├── src_python
    ├── ctf
    │   ├── __init__.py
    │   └── random.pyx
    ├── Makefile
    └── ctf_ext.h
├── .gitignore
├── studies
    ├── Makefile
    ├── fast_3mm.cxx
    └── fast_diagram.cxx
├── scalapack_tests
    ├── Makefile
    ├── conj.h
    └── qr.cxx
├── src
    ├── scripts
    │   ├── license.sh
    │   ├── expand_includes.sh
    │   ├── manual_readlink.sh
    │   └── recursive_expand_includes.sh
    ├── Makefile
    ├── sparse_formats
    │   ├── Makefile
    │   └── coo.h
    ├── mapping
    │   ├── Makefile
    │   └── distribution.h
    ├── shared
    │   ├── memcontrol.h
    │   ├── fompi_wrapper.h
    │   ├── Makefile
    │   ├── int_timer.h
    │   ├── init_models.h
    │   ├── init_models.cxx
    │   ├── blas_symbs.cxx
    │   └── offload.h
    ├── symmetry
    │   ├── Makefile
    │   └── sym_indices.h
    ├── redistribution
    │   ├── Makefile
    │   ├── dgtog_redist.h
    │   ├── dgtog_redist.cxx
    │   ├── nosym_transp.h
    │   └── glb_cyclic_reshuffle.h
    ├── scaling
    │   ├── Makefile
    │   ├── sym_seq_scl.h
    │   ├── scale_tsr.h
    │   └── scaling.h
    ├── interface
    │   ├── flop_counter.cxx
    │   ├── ring.cxx
    │   ├── Makefile
    │   ├── partition.h
    │   ├── scalar.h
    │   ├── decomposition.h
    │   ├── scalar.cxx
    │   ├── group.h
    │   ├── fun_term.h
    │   ├── partition.cxx
    │   ├── monoid.cxx
    │   ├── vector.cxx
    │   ├── back_comp.h
    │   ├── ring.h
    │   ├── timer.h
    │   ├── vector.h
    │   ├── sparse_tensor.cxx
    │   └── sparse_tensor.h
    ├── summation
    │   ├── Makefile
    │   └── sym_seq_sum.h
    ├── tensor
    │   └── Makefile
    └── contraction
    │   ├── Makefile
    │   ├── sp_seq_ctr.h
    │   ├── spctr_comm.h
    │   ├── ctr_offload.h
    │   └── spctr_offload.h
├── include
    └── ctf.hpp
├── test
    ├── Makefile
    ├── ccsdt_map_test.cxx
    ├── reduce_bcast.cxx
    ├── python
    │   └── test_dot.py
    ├── diag_ctr.cxx
    ├── endomorphism.cxx
    ├── univar_function.cxx
    ├── multi_tsr_sym.cxx
    ├── diag_sym.cxx
    ├── repack.cxx
    ├── readall_test.cxx
    ├── speye.cxx
    ├── bivar_function.cxx
    ├── sy_times_ns.cxx
    ├── endomorphism_cust_sp.cxx
    ├── sptensor_sum.cxx
    ├── dft.cxx
    ├── bivar_transform.cxx
    └── endomorphism_cust.cxx
├── examples
    ├── Makefile
    ├── btwn_central.h
    ├── moldynamics.h
    ├── spmv.cxx
    ├── checkpoint.cxx
    ├── particle_interaction.cxx
    ├── spectral_element.cxx
    ├── dft_3D.cxx
    ├── trace.cxx
    ├── jacobi.cxx
    ├── sssp.cxx
    └── scan.cxx
├── bench
    ├── Makefile
    ├── model_trainer_kernels.cxx
    └── bench_contraction.cxx
├── license.txt
└── .travis.yml


/src_python/ctf/__init__.py:
--------------------------------------------------------------------------------
1 | from ctf.core import *
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Build directories
 2 | lib
 3 | lib_shared
 4 | lib_python
 5 | obj
 6 | obj_ext
 7 | obj_shared
 8 | bin
 9 | 
10 | # Autogenerated in first invocation of configure
11 | config.mk
12 | setup.py
13 | how-did-i-configure
14 | .*.swp
15 | .*.swo
16 | 


--------------------------------------------------------------------------------
/studies/Makefile:
--------------------------------------------------------------------------------
 1 | include $(BDIR)/config.mk
 2 | 
 3 | 
 4 | .PHONY: $(STUDIES)
 5 | $(STUDIES): %: $(BDIR)/bin/%
 6 | 
 7 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx ../studies/*.cxx Makefile ../Makefile 
 8 | 	$(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
 9 | 
10 | 


--------------------------------------------------------------------------------
/scalapack_tests/Makefile:
--------------------------------------------------------------------------------
1 | include $(BDIR)/config.mk
2 | 
3 | .PHONY:
4 | $(SCALAPACK_TESTS): %: $(BDIR)/bin/%
5 | 
6 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile ../src/interface
7 | 	$(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIB_SCLPCK) $(LIBS)
8 | 
9 | 


--------------------------------------------------------------------------------
/src/scripts/license.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh 
 2 | 
 3 | for file in ../*/*.hxx ../*/*.cxx ../*/*.h 
 4 | do
 5 |   if grep MERCH $file
 6 |   then
 7 |     vim $file -c ":d22" -c ":%s/\ \*\ SUCH\ DAMAGE\.\ \*\//\/*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*\//g" -c ":wq"
 8 |   fi
 9 | done
10 | 


--------------------------------------------------------------------------------
/src/scripts/expand_includes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | REL_SCRIPT_DIR=$(dirname $0)
3 | SCRIPT=$(${REL_SCRIPT_DIR}/manual_readlink.sh $0)
4 | SCRIPT_DIR=$(dirname $SCRIPT)
5 | touch ${SCRIPT_DIR}/visited_list.txt
6 | $SCRIPT_DIR/recursive_expand_includes.sh $SCRIPT_DIR/../../include/ctf.hpp &> $SCRIPT_DIR/../../include/ctf_all.hpp
7 | rm -f ${SCRIPT_DIR}/visited_list.txt
8 | 


--------------------------------------------------------------------------------
/src/Makefile:
--------------------------------------------------------------------------------
 1 | ctf: subdirs
 2 | 
 3 | SUBDIRS = interface shared tensor symmetry mapping redistribution scaling summation contraction sparse_formats
 4 |      
 5 | .PHONY: subdirs $(SUBDIRS)
 6 |      
 7 | subdirs: $(SUBDIRS)
 8 |     
 9 | $(SUBDIRS):
10 | 	$(MAKE) -C $@
11 |      
12 | clean:
13 | 	for dir in $(SUBDIRS) ; do \
14 | 		$(MAKE) $@ -C $$dir ; \
15 | 	done 
16 | 	
17 | 


--------------------------------------------------------------------------------
/src/sparse_formats/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = coo.o csr.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk ../tensor/algstrct.h ../shared/util.h
 6 | 
 7 | ctf: $(OBJS) 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 
13 | clean: 
14 | 	rm -f *.o 
15 | 


--------------------------------------------------------------------------------
/src/mapping/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = mapping.o distribution.o topology.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | ctf: $(OBJS) 
 5 | 
 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 7 | HDRS = ../../Makefile $(BDIR)/config.mk  ../interface/common.h ../mapping/mapping.h ../shared/util.h ../summation/sum_tsr.h ../tensor/untyped_tensor.h 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 


--------------------------------------------------------------------------------
/src/shared/memcontrol.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __MEMCONTROL_H__
 4 | #define __MEMCONTROL_H__
 5 | 
 6 | namespace CTF_int {
 7 |   void inc_tot_mem_used(int64_t a);
 8 |   int64_t proc_bytes_used();
 9 |   int64_t proc_bytes_total();
10 |   int64_t proc_bytes_available();
11 |   void set_memcap(double cap);
12 |   void set_mem_size(int64_t size);
13 |   int get_num_instances();
14 | }
15 | 
16 | 
17 | #endif
18 | 


--------------------------------------------------------------------------------
/src/symmetry/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = sym_indices.o symmetrization.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk  ../contraction/contraction.h ../interface/common.h ../interface/timer.h ../scaling/scaling.h ../shared/util.h ../summation/summation.h ../tensor/untyped_tensor.h
 6 | 
 7 | ctf: $(OBJS) 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 


--------------------------------------------------------------------------------
/src/shared/fompi_wrapper.h:
--------------------------------------------------------------------------------
 1 | #ifndef __FOMPI_WRAPPER__
 2 | #define __FOMPI_WRAPPER__
 3 | 
 4 | #ifdef USE_FOMPI
 5 | #include <fompi.h>
 6 | 
 7 | typedef foMPI_Win CTF_Win;
 8 | #define MPI_Init(...) foMPI_Init(__VA_ARGS__)
 9 | #define MPI_Win_create(...) foMPI_Win_create(__VA_ARGS__)
10 | #define MPI_Win_fence(...) foMPI_Win_fence(__VA_ARGS__)
11 | #define MPI_Win_free(...) foMPI_Win_free(__VA_ARGS__)
12 | #define MPI_Put(...) foMPI_Put(__VA_ARGS__)
13 | #else
14 | #include <mpi.h>
15 | typedef MPI_Win CTF_Win;
16 | #endif
17 | 
18 | #endif
19 | 


--------------------------------------------------------------------------------
/src/redistribution/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = redist.o sparse_rw.o pad.o nosym_transp.o cyclic_reshuffle.o glb_cyclic_reshuffle.o dgtog_redist.o dgtog_calc_cnt.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | ctf: $(OBJS) 
 5 | 
 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 7 | HDRS = ../../Makefile $(BDIR)/config.mk ../interface/common.h ../mapping/distribution.h ../shared/util.h ../tensor/algstrct.h ../shared/model.h ../shared/init_models.h
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 
13 | clean: 
14 | 	rm -f *.o 
15 | 


--------------------------------------------------------------------------------
/src/shared/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = util.o memcontrol.o int_timer.o model.o init_models.o blas_symbs.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk  ../interface/common.h ../interface/timer.h
 6 | 
 7 | LNVCC_OBJS = offload.o
 8 | NVCC_OBJS = $(addprefix $(ODIR)/, $(LNVCC_OBJS))
 9 | 
10 | ctf: $(OBJS) $(NVCC_OBJS)
11 | 
12 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
13 | 	$(FCXX) -c $< -o $@
14 | 
15 | $(NVCC_OBJS): $(ODIR)/%.o: %.cu *.h $(HDRS)
16 | 	$(OFFLOAD_CXX) -c $< -o $@
17 | 
18 | 


--------------------------------------------------------------------------------
/include/ctf.hpp:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | #ifndef __CTF_HPP__
 3 | #define __CTF_HPP__
 4 | 
 5 | #include <mpi.h>
 6 | #include <stdio.h>
 7 | #include <stdint.h>
 8 | #include <vector>
 9 | #include <map>
10 | #include <set>
11 | #include <deque>
12 | #include <complex>
13 | #include <assert.h>
14 | 
15 | #define CTF_VERSION 150
16 | 
17 | #include "../src/interface/tensor.h"
18 | #include "../src/interface/idx_tensor.h"
19 | #include "../src/interface/timer.h"
20 | #include "../src/interface/back_comp.h"
21 | #include "../src/interface/kernel.h"
22 | 
23 | #endif
24 | 
25 | 


--------------------------------------------------------------------------------
/src_python/ctf/random.pyx:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | sys.path.insert(0, os.path.abspath("."))
 3 | 
 4 | cdef extern from "ctf.hpp" namespace "CTF_int":
 5 |     void init_rng(int seed)
 6 | 
 7 | cdef extern from "ctf.hpp" namespace "CTF":
 8 |     cdef cppclass World:
 9 |         int rank, np;
10 |         World()
11 |         World(int)
12 |     World & get_universe()
13 | 
14 | def seed(seed):
15 |     init_rng(seed+get_universe().rank)
16 | 
17 | def all_seed(seed):
18 |     init_rng(seed)
19 | 
20 | def random(shape):
21 |     import ctf
22 |     A = ctf.tensor(shape)
23 |     A.fill_random()
24 |     return A
25 |  
26 | 


--------------------------------------------------------------------------------
/test/Makefile:
--------------------------------------------------------------------------------
 1 | include $(BDIR)/config.mk
 2 | 
 3 | 
 4 | .PHONY:
 5 | $(TESTS): %: $(BDIR)/bin/%
 6 | 
 7 | ifneq (,$(findstring DUSE_SCALAPACK,$(DEFS))) 
 8 | SCALA_TESTS = pgemm_test nonsq_pgemm_test 
 9 | $(SCALA_TESTS): %: $(ODIR)/bin/%
10 | endif
11 | 
12 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx ../examples/*.cxx Makefile ../Makefile $(ODIR)/btwn_central_kernels.o
13 | 	$(FCXX) $< $(ODIR)/btwn_central_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
14 | 
15 | $(ODIR)/btwn_central_kernels.o: ../examples/btwn_central_kernels.cxx ../examples/btwn_central.h 
16 | 	$(OFFLOAD_CXX) -c $< -o $@ -I../include/ 
17 | 


--------------------------------------------------------------------------------
/src/scaling/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = scaling.o sym_seq_scl.o scale_tsr.o strp_tsr.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | ctf: $(OBJS) 
 5 | 
 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 7 | HDRS = ../../Makefile $(BDIR)/config.mk  ../contraction/ctr_comm.h ../interface/common.h ../interface/idx_tensor.h ../interface/term.h ../mapping/distribution.h ../mapping/mapping.h ../scaling/scale_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/util.h ../summation/sum_tsr.h ../tensor/algstrct.h ../tensor/untyped_tensor.h
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 


--------------------------------------------------------------------------------
/examples/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | .PHONY:
 3 | $(EXAMPLES): %: $(BDIR)/bin/%
 4 | $(BDIR)/bin/btwn_central: btwn_central.cxx btwn_central_kernels.cxx $(ODIR)/btwn_central_kernels.o $(BDIR)/lib/libctf.a Makefile ../Makefile 
 5 | 	$(FCXX) $< $(ODIR)/btwn_central_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
 6 | 
 7 | $(ODIR)/btwn_central_kernels.o: btwn_central_kernels.cxx btwn_central.h ../src/interface
 8 | 	$(OFFLOAD_CXX) -c $< -o $@ -I../include/ 
 9 | 
10 | 
11 | $(BDIR)/bin/%: %.cxx  $(BDIR)/lib/libctf.a Makefile ../Makefile ../src/interface
12 | 	$(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
13 | 
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/src/interface/flop_counter.cxx:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #include "timer.h"
 4 | #include "common.h"
 5 | 
 6 | namespace CTF {
 7 |   Flop_counter::Flop_counter(){
 8 |     start_count = CTF_int::get_flops();
 9 |   }
10 | 
11 |   Flop_counter::~Flop_counter(){
12 |   }
13 | 
14 |   void Flop_counter::zero(){
15 |     start_count = CTF_int::get_flops();
16 |   }
17 | 
18 |   int64_t Flop_counter::count(MPI_Comm comm){
19 |     int64_t allf;
20 |     int64_t myf = (CTF_int::get_flops() - start_count);
21 |     MPI_Allreduce(&myf,&allf,1,MPI_INT64_T,MPI_SUM,comm);
22 |     return allf;
23 |   }
24 | }
25 | 


--------------------------------------------------------------------------------
/src/interface/ring.cxx:
--------------------------------------------------------------------------------
 1 | #include "../../include/ctf.hpp"
 2 | 
 3 | 
 4 | namespace CTF_int {
 5 |   CTF::Ring<float> float_ring = CTF::Ring<float>();
 6 |   CTF_int::algstrct const * get_float_ring(){
 7 |     return &float_ring;
 8 |   }
 9 |   CTF::Ring<double> double_ring = CTF::Ring<double>();
10 |   CTF_int::algstrct const * get_double_ring(){
11 |     return &double_ring;
12 |   }
13 |   CTF::Ring<int> int_ring = CTF::Ring<int>();
14 |   CTF_int::algstrct const * get_int_ring(){
15 |     return &int_ring;
16 |   }
17 |   CTF::Ring<int64_t> int64_t_ring = CTF::Ring<int64_t>();
18 |   CTF_int::algstrct const * get_int64_t_ring(){
19 |     return &int64_t_ring;
20 |   }
21 | }
22 | 
23 | 
24 | 


--------------------------------------------------------------------------------
/src/scripts/manual_readlink.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # taken from https://stackoverflow.com/questions/1055671/how-can-i-get-the-behavior-of-gnus-readlink-f-on-a-mac
 3 | 
 4 | TARGET_FILE=$1
 5 | 
 6 | cd `dirname $TARGET_FILE`
 7 | TARGET_FILE=`basename $TARGET_FILE`
 8 | 
 9 | # Iterate down a (possible) chain of symlinks
10 | while [ -L "$TARGET_FILE" ]
11 | do
12 |     TARGET_FILE=`readlink $TARGET_FILE`
13 |     cd `dirname $TARGET_FILE`
14 |     TARGET_FILE=`basename $TARGET_FILE`
15 | done
16 | 
17 | # Compute the canonicalized name by finding the physical path 
18 | # for the directory we're in and appending the target file.
19 | PHYS_DIR=`pwd -P`
20 | RESULT=$PHYS_DIR/$TARGET_FILE
21 | echo $RESULT
22 | 


--------------------------------------------------------------------------------
/src/scripts/recursive_expand_includes.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | REL_SCRIPT_DIR=$(dirname $0)
 3 | SCRIPT=$(${REL_SCRIPT_DIR}/manual_readlink.sh $0)
 4 | SCRIPT_DIR=$(dirname $SCRIPT)
 5 | DIR=$(pwd)
 6 | cd $(dirname $1)
 7 | FNAME=$(basename $1)
 8 | FDIR=$(pwd)
 9 | FULLFNAME="${FDIR}/${FNAME}"
10 | if grep -Fxq "$FULLFNAME" $SCRIPT_DIR/visited_list.txt
11 | then
12 |   exit 0
13 | else
14 |   echo $FULLFNAME >> $SCRIPT_DIR/visited_list.txt
15 |   TMP_FILE="${FNAME}.tmp.concat"
16 |   cp $FNAME $TMP_FILE
17 |   sed -i -e 's/#include "\(.*\)"/include \1/g' $TMP_FILE
18 |   awk '
19 |   $1=="include" && NF>=2 {
20 |      system("'$SCRIPT' " $2)
21 |      next
22 |   }
23 |   {print}' "$TMP_FILE"
24 |   rm $TMP_FILE
25 | fi
26 | 


--------------------------------------------------------------------------------
/src/summation/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = summation.o sym_seq_sum.o sum_tsr.o  spr_seq_sum.o spsum_tsr.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk  ../mapping/distribution.h ../mapping/mapping.h ../redistribution/nosym_transp.h ../redistribution/redist.h ../scaling/scaling.h ../scaling/strp_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/util.h ../symmetry/sym_indices.h ../symmetry/symmetrization.h ../tensor/algstrct.h ../tensor/untyped_tensor.h ../shared/model.h ../shared/init_models.h
 6 | 
 7 | ctf: $(OBJS) 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 
13 | clean: 
14 | 	rm -f *.o 
15 | 


--------------------------------------------------------------------------------
/scalapack_tests/conj.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CONJ_H__
 2 | #define __CONJ_H__
 3 | 
 4 | template <typename dtype>
 5 | CTF::Matrix<dtype> conj(CTF::Matrix<dtype> & A){
 6 |   return A;
 7 | }
 8 | template <>
 9 | CTF::Matrix< std::complex<float> > conj(CTF::Matrix< std::complex<float> > & A){
10 |   CTF::Matrix< std::complex<float> > B(A);
11 |   B["ij"] = CTF::Function< std::complex<float>>([](std::complex<float> a){ return std::conj(a); })(A["ij"]);
12 |   return B;
13 | }
14 | template <>
15 | CTF::Matrix<std::complex<double>> conj(CTF::Matrix<std::complex<double>> & A){
16 |   CTF::Matrix<std::complex<double>> B(A);
17 |   B["ij"] = CTF::Function<std::complex<double>>([](std::complex<double> a){ return std::conj(a); })(A["ij"]);
18 |   return B;
19 | }
20 | #endif
21 | 


--------------------------------------------------------------------------------
/src/interface/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = common.o  flop_counter.o world.o idx_tensor.o term.o schedule.o semiring.o partition.o fun_term.o monoid.o set.o ring.o
 2 | 
 3 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 4 | 
 5 | 
 6 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 7 | HDRS = ../../Makefile $(BDIR)/config.mk  ../contraction/contraction.h ../../include/ctf.hpp ../interface/common.h ../mapping/topology.h ../scaling/scaling.h ../shared/blas_symbs.h ../shared/memcontrol.h ../shared/util.h ../summation/summation.h ../tensor/algstrct.h ../tensor/untyped_tensor.h  ../tensor/untyped_tensor_tmpl.h ../sparse_formats/csr.h ../shared/lapack_symbs.h
 8 | 
 9 | ctf: $(OBJS) 
10 |  
11 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
12 | 	$(FCXX) -c $< -o $@
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/bench/Makefile:
--------------------------------------------------------------------------------
 1 | include $(BDIR)/config.mk
 2 | 
 3 | .PHONY:
 4 | $(BENCHMARKS): %: $(BDIR)/bin/%
 5 | 
 6 | 
 7 | ifneq (,$(findstring DUSE_SCALAPACK,$(DEFS))) 
 8 | SCALA_BENCHMARKS = nonsq_pgemm_bench 
 9 | $(SCALA_BENCHMARKS): %: $(BDIR)/bin/%
10 | endif
11 | 
12 | 
13 | $(ODIR)/model_trainer_kernels.o: model_trainer_kernels.cxx ../src/interface
14 | 	$(OFFLOAD_CXX) -c $< -o $@ -I../include/ 
15 | 
16 | $(BDIR)/bin/model_trainer: model_trainer.cxx $(ODIR)/model_trainer_kernels.o $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile ../examples/ccsd.cxx
17 | 	$(FCXX) $< $(ODIR)/model_trainer_kernels.o -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
18 | 
19 | $(BDIR)/bin/%: %.cxx $(BDIR)/lib/libctf.a *.cxx Makefile ../Makefile 
20 | 	$(FCXX) $< -o $@ -I../include/ -L$(BDIR)/lib -lctf $(LIBS)
21 | 
22 | 


--------------------------------------------------------------------------------
/src/tensor/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = untyped_tensor.o algstrct.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk  ../contraction/contraction.h ../interface/common.h ../interface/idx_tensor.h ../interface/partition.h ../interface/timer.h ../interface/world.h ../mapping/distribution.h ../mapping/mapping.h ../redistribution/cyclic_reshuffle.h ../redistribution/dgtog_redist.h ../redistribution/glb_cyclic_reshuffle.h ../redistribution/nosym_transp.h ../redistribution/pad.h ../redistribution/redist.h ../redistribution/sparse_rw.h ../shared/blas_symbs.h ../shared/memcontrol.h ../shared/util.h ../summation/summation.h
 6 | 
 7 | ctf: $(OBJS) 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 


--------------------------------------------------------------------------------
/src/contraction/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = contraction.o sym_seq_ctr.o ctr_offload.o ctr_comm.o ctr_tsr.o ctr_2d_general.o sp_seq_ctr.o spctr_tsr.o spctr_comm.o spctr_2d_general.o spctr_offload.o
 2 | OBJS = $(addprefix $(ODIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ../../Makefile $(BDIR)/config.mk  ../interface/functions.h ../mapping/distribution.h ../mapping/mapping.h ../redistribution/nosym_transp.h ../redistribution/redist.h ../scaling/strp_tsr.h ../shared/iter_tsr.h ../shared/memcontrol.h ../shared/offload.h ../shared/util.h ../symmetry/sym_indices.h ../symmetry/symmetrization.h ../tensor/algstrct.h ../tensor/untyped_tensor.h ../shared/model.h ../shared/init_models.h ../sparse_formats/coo.h ../sparse_formats/csr.h
 6 |  
 7 | ctf: $(OBJS) 
 8 | 
 9 | $(OBJS): $(ODIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/src_python/Makefile:
--------------------------------------------------------------------------------
 1 | LOBJS = ctf_ext.o
 2 | OBJS = $(addprefix $(OEDIR)/, $(LOBJS))
 3 | 
 4 | #%d | r ! grep -ho "\.\..*\.h" *.cxx *.h | sort | uniq
 5 | HDRS = ctf_ext.h ../Makefile $(BDIR)/config.mk  ../src/contraction/contraction.h ../src/interface/common.h ../src/interface/idx_tensor.h ../src/interface/partition.h ../src/interface/timer.h ../src/interface/world.h ../src/mapping/distribution.h ../src/mapping/mapping.h ../src/redistribution/cyclic_reshuffle.h ../src/redistribution/dgtog_redist.h ../src/redistribution/glb_cyclic_reshuffle.h ../src/redistribution/nosym_transp.h ../src/redistribution/pad.h ../src/redistribution/redist.h ../src/redistribution/sparse_rw.h ../src/shared/blas_symbs.h ../src/shared/memcontrol.h ../src/shared/util.h ../src/summation/summation.h
 6 | 
 7 | ctf_ext_objs: $(OBJS)
 8 | 
 9 | $(OBJS): $(OEDIR)/%.o: %.cxx *.h  $(HDRS)
10 | 	$(FCXX) -c $< -o $@
11 | 
12 | 


--------------------------------------------------------------------------------
/src/interface/partition.h:
--------------------------------------------------------------------------------
 1 | #ifndef __PARTITION_H__
 2 | #define __PARTITION_H__
 3 | 
 4 | 
 5 | namespace CTF {
 6 | 
 7 |   /**
 8 |    * \defgroup CTF_part Partition/Decomposition interface
 9 |    * \addtogroup CTF_part
10 |    * @{
11 |    */
12 |   class Idx_Partition;
13 | 
14 |   class Partition {
15 |     public:
16 |       int order;
17 |       int * lens;
18 | 
19 |       Partition(int order, int const * lens);
20 |       ~Partition();
21 |       Partition(Partition const & other);
22 |       Partition();
23 | 
24 |       Idx_Partition operator[](char const * idx);
25 |       void operator=(Partition const & other);
26 |   };
27 | 
28 |   class Idx_Partition {
29 |     public:
30 |       Partition part;
31 |       char * idx;
32 |       Idx_Partition();
33 |       ~Idx_Partition();
34 |       Idx_Partition(Partition const & part, char const * idx);
35 | 
36 |       /**
37 |        * \brief extracts non-trivial part of partition by ommitting unit dimensions
38 |        * \return new partition with all dimensions non-unit
39 |        */
40 |       Idx_Partition reduce_order() const;
41 |   };
42 | 
43 | /**
44 |  * @}
45 |  */
46 | }
47 | 
48 | #endif
49 | 


--------------------------------------------------------------------------------
/examples/btwn_central.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BTWN_CENTRAL_H__
 2 | #define __BTWN_CENTRAL_H__
 3 | 
 4 | #include <ctf.hpp>
 5 | 
 6 | #ifdef __CUDACC__
 7 | #define DEVICE __device__
 8 | #define HOST __host__
 9 | #else
10 | #define DEVICE
11 | #define HOST
12 | #endif
13 | 
14 | 
15 | //structure for regular path that keeps track of the multiplicity of paths
16 | class mpath {
17 |   public:
18 |   int w; // weighted distance
19 |   int m; // multiplictiy
20 |   DEVICE HOST
21 |   mpath(int w_, int m_){ w=w_; m=m_; }
22 |   DEVICE HOST
23 |   mpath(mpath const & p){ w=p.w; m=p.m; }
24 |   DEVICE HOST
25 |   mpath(){ w=0; m=0;};
26 | };
27 | 
28 | //path with a centrality score
29 | class cpath {
30 |   public:
31 |   double c; // centrality score
32 |   float m;
33 |   int w;
34 |   DEVICE HOST
35 |   cpath(int w_, float m_, double c_){ w=w_; m=m_; c=c_;}
36 |   DEVICE HOST
37 |   cpath(cpath const & p){ w=p.w; m=p.m; c=p.c; }
38 |   cpath(){ c=0.0; m=0.0; w=0;};
39 | };
40 | 
41 | 
42 | // min Monoid for cpath structure
43 | CTF::Monoid<cpath> get_cpath_monoid();
44 | 
45 | //(min, +) tropical semiring for mpath structure
46 | CTF::Semiring<mpath> get_mpath_semiring();
47 | 
48 | CTF::Bivar_Function<int,mpath,mpath> * get_Bellman_kernel();
49 | 
50 | CTF::Bivar_Function<int,cpath,cpath> * get_Brandes_kernel();
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/shared/int_timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef __INT_TIMER_H__
 2 | #define __INT_TIMER_H__
 3 | 
 4 | namespace CTF {
 5 | /**
 6 |  * \defgroup timer Timing and cost measurement
 7 |  * \addtogroup timer
 8 |  * @{
 9 |  */
10 |   void set_main_args(int argc, const char * const * argv);
11 | 
12 | /**
13 |  * @}
14 |  */
15 | 
16 | }
17 | #ifdef PROFILE
18 | #define TAU
19 | #endif
20 | 
21 | #ifdef TAU
22 | #define TAU_FSTART(ARG)                                           \
23 |   do { CTF::Timer t(#ARG); t.start(); } while (0);
24 | 
25 | #define TAU_FSTOP(ARG)                                            \
26 |   do { CTF::Timer t(#ARG); t.stop(); } while (0);
27 | 
28 | #define TAU_PROFILE_TIMER(ARG1, ARG2, ARG3, ARG4)                 
29 | 
30 | #define TAU_PROFILE_INIT(argc, argv)                              \
31 |   CTF::set_main_args(argc, argv);
32 | 
33 | #define TAU_PROFILE_SET_NODE(ARG)
34 | 
35 | #define TAU_PROFILE_START(ARG)                                    \
36 |   CTF::Timer __CTF::Timer##ARG(#ARG);
37 | 
38 | #define TAU_PROFILE_STOP(ARG)                                     \
39 |  __CTF::Timer##ARG.stop();
40 | 
41 | #define TAU_PROFILE_SET_CONTEXT(ARG)                              \
42 |   if (ARG==0) CTF::set_context(MPI_COMM_WORLD);                    \
43 |   else CTF::set_context((MPI_Comm)ARG);
44 | #endif
45 | 
46 | 
47 | #endif
48 | 
49 | 


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | /* Copyright (c) 2011, Edgar Solomonik>
 2 |  * All rights reserved.
 3 |  * 
 4 |  * Redistribution and use in source and binary forms, with or without
 5 |  * modification, are permitted provided that the following 
 6 |  * conditions are met:
 7 |  *      * Redistributions of source code must retain the above copyright
 8 |  *        notice, this list of conditions and the following disclaimer.
 9 |  *      * Redistributions in binary form must reproduce the above copyright
10 |  *        notice, this list of conditions and the following disclaimer in the
11 |  *        documentation and/or other materials provided with the distribution.
12 |  *
13 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
15 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
16 |  * ARE DISCLAIMED. IN NO EVENT SHALL EDGAR SOLOMONIK BE LIABLE FOR ANY
17 |  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
18 |  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 
19 |  * SERVICES LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 
20 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
21 |  * LIABILITY, OR TORT(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
22 |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
23 |  * SUCH DAMAGE. */
24 | 


--------------------------------------------------------------------------------
/src/contraction/sp_seq_ctr.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SP_SEQ_CTR_H__
 2 | #define __SP_SEQ_CTR_H__
 3 | 
 4 | #include "contraction.h"
 5 | namespace CTF_int{
 6 |   void spA_dnB_dnC_seq_ctr(char const *            alpha,
 7 |                            char  const *           A,
 8 |                            int64_t                 size_A,
 9 |                            algstrct const *        sr_A,
10 |                            int                     order_A,
11 |                            int const *             edge_len_A,
12 |                            int const *             sym_A,
13 |                            int const *             idx_map_A,
14 |                            char const *            B,
15 |                            algstrct const *        sr_B,
16 |                            int                     order_B,
17 |                            int const *             edge_len_B,
18 |                            int const *             sym_B,
19 |                            int const *             idx_map_B,
20 |                            char const *            beta,
21 |                            char *                  C,
22 |                            algstrct const *        sr_C,
23 |                            int                     order_C,
24 |                            int const *             edge_len_C,
25 |                            int const *             sym_C,
26 |                            int const *             idx_map_C,
27 |                            bivar_function const *  func);
28 | }
29 | #endif
30 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | os: linux
 3 | language: python
 4 | python:
 5 |   - "2.7"
 6 |   - "3.5"
 7 |   - "3.6"
 8 | env:
 9 |   - CTF_CXX=clang++
10 |   - CTF_CXX=g++
11 | 
12 | addons:
13 |   apt:
14 |     sources:
15 |       - ubuntu-toolchain-r-test
16 |     packages:
17 |       - gcc-5
18 |       - g++-5
19 |       - gfortran-5
20 |       - libgfortran-5-dev
21 |       - libblas-dev
22 |       - liblapack-dev
23 |       - mpich2
24 |       - libmpich2-dev
25 |       - cmake
26 | before_install:
27 |   - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]] || [[ "$TRAVIS_PYTHON_VERSION" == "2.6" ]] ; then
28 |       export PYTHONMAJORV=2;
29 |     else
30 |       export PYTHONMAJORV=3;
31 |     fi
32 |   - wget https://repo.continuum.io/miniconda/Miniconda${PYTHONMAJORV}-latest-Linux-x86_64.sh
33 |   - bash Miniconda${PYTHONMAJORV}-latest-Linux-x86_64.sh -b
34 |   - export PATH=$HOME/miniconda${PYTHONMAJORV}/bin:$PATH
35 |   - conda create -y -n mypy python=$TRAVIS_PYTHON_VERSION
36 |   - source activate mypy
37 |   - conda install -y cython
38 |   - conda install -y numpy nomkl blas=*=openblas
39 | install:
40 |   - $CTF_CXX --version
41 |   - FC=gfortran-5 ./configure CXX="mpicxx -cxx=$CTF_CXX" --build-hptt --build-scalapack
42 |   - make -j2 
43 |   - make python -j2
44 | script:
45 |   - make test
46 |   - make test2
47 |   - make python_test
48 |   - make python_test2
49 | after_failure:
50 | notifications:
51 |   email:
52 |     recipients:
53 |       - solomon2@illinois.edu
54 |     on_success: change
55 |     on_failure: always
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/src/interface/scalar.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SCALAR_H__
 2 | #define __SCALAR_H__
 3 | namespace CTF {
 4 | 
 5 |   /**
 6 |    * \addtogroup CTF
 7 |    * @{
 8 |    **/
 9 |   /**
10 |    * \brief Scalar class which encapsulates a 0D tensor 
11 |    */
12 |   template <typename dtype=double>
13 |   class Scalar : public Tensor<dtype> {
14 |     public:
15 |       /**
16 |        * \brief constructor for a scalar
17 |        * \param[in] wrld CTF world where the tensor will live
18 |        * \param[in] sr defines the tensor arithmetic for this tensor
19 |        */
20 |       Scalar(World &                   wrld=get_universe(),
21 |              CTF_int::algstrct const & sr=Ring<dtype>());
22 | 
23 |       /**
24 |        * \brief constructor for a scalar with predefined value
25 |        * \param[in] val scalar value
26 |        * \param[in] wrld CTF world where the tensor will live
27 |        * \param[in] sr defines the tensor arithmetic for this tensor
28 |        */ 
29 |       Scalar(dtype                     val,
30 |              World &                   wrld=get_universe(),
31 |              CTF_int::algstrct const & sr=Ring<dtype>());
32 | 
33 |       /**
34 |        * \brief returns scalar value
35 |        */
36 |       dtype get_val();
37 |       
38 |       /**
39 |        * \brief sets scalar value
40 |        */
41 |       void set_val(dtype val);
42 | 
43 |       /**
44 |        * \brief casts into a dtype value
45 |        */
46 |       operator dtype() { return get_val(); }
47 | 
48 |       Scalar<dtype> & operator=(const Scalar<dtype> & A);
49 | 
50 |   };
51 | 
52 |   /**
53 |    * @}
54 |    */
55 | }
56 | #include "scalar.cxx"
57 | #endif
58 | 


--------------------------------------------------------------------------------
/src/interface/decomposition.h:
--------------------------------------------------------------------------------
 1 | #ifndef __DECOMPOSITION_H__
 2 | #define __DECOMPOSITION_H__
 3 | #include "tensor.h"
 4 | #include "matrix.h"
 5 | #include "vector.h"
 6 | namespace CTF {
 7 |  
 8 |   void fold_unfold(Tensor<dtype>& X, Tensor<dtype>& Y);
 9 | 
10 |   template<typename dtype>  
11 |   class Decomposition {
12 |     public:
13 |       /**
14 |        * \brief associated an index map with the tensor decomposition for algebra
15 |        * \param[in] idx_map index assignment for this tensor
16 |        */
17 |       virtual Contract_Term operator[](char const * idx_map) = 0;
18 |   };
19 | 
20 |   template<typename dtype>  
21 |   class HoSVD : public Decomposition {
22 |     public:
23 |       Tensor<dtype> core_tensor;
24 |       std::vector< Matrix<dtype> > factor_matrices;
25 | 
26 |       /**
27 |        * \calculate higher order singular value decomposition of a tensor
28 |        * \param[in] ranks ranks(dimensions) of the core tensor and factor matrices
29 |        */
30 |       HoSVD(Tensor<dtype> T, int * ranks);
31 | 
32 |       /**
33 |        * \calculate initialize a higher order singular value decomposition of a tensor to zero
34 |        * \param[in] lens ranks(dimensions) of the factored tensor
35 |        * \param[in] ranks ranks(dimensions) of the core tensor and factor matrices
36 |        */
37 |       HoSVD(int * lens, int * ranks);
38 | 
39 |       /**
40 |        * \brief associated an index map with the tensor decomposition for algebra
41 |        * \param[in] idx_map index assignment for this tensor
42 |        */
43 |       Contract_Term operator[](char const * idx_map);
44 | 
45 |   };
46 | 
47 | }
48 | 
49 | #endif
50 | 


--------------------------------------------------------------------------------
/src/interface/scalar.cxx:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | #include "common.h"
 3 | 
 4 | namespace CTF {
 5 | 
 6 |   template<typename dtype>
 7 |   Scalar<dtype>::Scalar(World & world_, CTF_int::algstrct const & sr_) :
 8 |     Tensor<dtype>(0, 0,  NULL, NULL, world_, sr_) {
 9 |     
10 |   }
11 | 
12 |   template<typename dtype>
13 |   Scalar<dtype>::Scalar(dtype                     val,
14 |                                 World &                   world,
15 |                                 CTF_int::algstrct const & sr_)
16 |      : Tensor<dtype>(0, 0, NULL, NULL, world, sr_) {
17 |     int64_t s; 
18 |     dtype * arr;
19 | 
20 |     if (world.cdt.rank == 0){
21 |       arr = this->get_raw_data(&s); 
22 |       arr[0] = val;
23 |     }
24 |   }
25 |       
26 | 
27 |   template<typename dtype>
28 |   dtype Scalar<dtype>::get_val(){
29 |     int64_t s; 
30 |     dtype * datap;
31 |     dtype val;
32 |     datap = this->get_raw_data(&s); 
33 |     memcpy(&val, datap, sizeof(dtype));
34 |     MPI_Bcast((char *)&val, sizeof(dtype), MPI_CHAR, 0, this->wrld->comm);
35 |     return val;
36 |   }
37 | 
38 |   template<typename dtype>
39 |   void Scalar<dtype>::set_val(dtype const val){
40 |     int64_t s; 
41 |     dtype * arr;
42 |     if (this->world->ctf->get_rank() == 0){
43 |       arr = this->world->ctf->get_raw_data(&s); 
44 |       arr[0] = val;
45 |     }
46 |   }
47 |    
48 |   template<typename dtype>
49 |   Scalar<dtype> & Scalar<dtype>::operator=(const Scalar<dtype> & A){
50 |     CTF_int::tensor::free_self();
51 |     CTF_int::tensor::init(A.sr, A.order, A.lens, A.sym, A.wrld, 1, A.name, A.profile, A.is_sparse);
52 |     return *this;
53 |   }
54 | 
55 | } 
56 | 


--------------------------------------------------------------------------------
/src/redistribution/dgtog_redist.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "dgtog_calc_cnt.h"
 3 | 
 4 | namespace CTF_int {
 5 |   /**
 6 |    * \brief estimates execution time, given this processor sends a receives tot_sz across np procs
 7 |    * \param[in] tot_sz amount of data sent/recved
 8 |    * \param[in] np number of procs involved
 9 |    */
10 |   double dgtog_est_time(int64_t tot_sz, int np);
11 | 
12 |   void dgtog_reshuffle(int const *          sym,
13 |                        int const *          edge_len,
14 |                        distribution const & old_dist,
15 |                        distribution const & new_dist,
16 |                        char **              ptr_tsr_data,
17 |                        char **              ptr_tsr_new_data,
18 |                        algstrct const *     sr,
19 |                        CommData             ord_glb_comm);
20 | 
21 |   void redist_bucket_r0(int * const *        bucket_offset,
22 |                         int64_t * const *    data_offset,
23 |                         int * const *        ivmax_pre,
24 |                         int                  rep_phase0,
25 |                         int                  rep_idx0,
26 |                         int                  virt_dim0,
27 |                         bool                 data_to_buckets,
28 |                         char * __restrict__  data,
29 |                         char ** __restrict__ buckets,
30 |                         int64_t *            counts,
31 |                         algstrct const *     sr,
32 |                         int64_t              data_off,
33 |                         int                  bucket_off,
34 |                         int                  prev_idx);
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/examples/moldynamics.h:
--------------------------------------------------------------------------------
 1 | #ifndef __MOLDYNAMICS_H__
 2 | #define __MOLDYNAMICS_H__
 3 | 
 4 | class force {
 5 |   public:
 6 |   double fx;
 7 |   double fy;
 8 | 
 9 |   force operator-() const {
10 |     force fnew;
11 |     fnew.fx = -fx;
12 |     fnew.fy = -fy;
13 |     return fnew;
14 |   }
15 |   
16 |   force operator+(force const & fother) const {
17 |     force fnew;
18 |     fnew.fx = fx+fother.fx;
19 |     fnew.fy = fy+fother.fy;
20 |     return fnew;
21 |   }
22 | 
23 |   force(){
24 |     fx = 0.0;
25 |     fy = 0.0;
26 |   }
27 | 
28 |   // additive identity
29 |   force(int){
30 |     fx = 0.0;
31 |     fy = 0.0;
32 |   }
33 | };
34 | 
35 | class particle {
36 |   public:
37 |   double dx;
38 |   double dy;
39 |   double coeff;
40 |   int id;
41 | 
42 |   particle(){
43 |     dx = 0.0;
44 |     dy = 0.0;
45 |     coeff = 0.0;
46 |     id = 0;
47 |   }
48 | };
49 | 
50 | void acc_force(force f, particle & p){
51 |   p.dx += f.fx*p.coeff;
52 |   p.dy += f.fy*p.coeff;
53 | }
54 | 
55 | #ifdef __CUDACC__
56 | __device__ __host__
57 | #endif
58 | double get_distance(particle const & p, particle const & q){
59 |   return sqrt((p.dx-q.dx)*(p.dx-q.dx)+(p.dy-q.dy)*(p.dy-q.dy));
60 | }
61 | 
62 | #ifdef __CUDACC__
63 | __device__ __host__
64 | #endif
65 | force get_force(particle const p, particle const q){
66 |   force f;
67 |   f.fx = (p.dx-q.dx)/std::pow(get_distance(p,q)+.01,3);
68 |   f.fy = (p.dy-q.dy)/std::pow(get_distance(p,q)+.01,3);
69 |   return f;
70 | }
71 | namespace CTF {
72 |   template <>  
73 |   inline void Set<particle>::print(char const * a, FILE * fp) const {
74 |     fprintf(fp,"(dx=%lf dy=%lf coeff=%lf id=%d)",((particle*)a)[0].dx,((particle*)a)[0].dy,((particle*)a)[0].coeff,((particle*)a)[0].id);
75 |   }
76 |   template <>  
77 |   inline void Set<force>::print(char const * a, FILE * fp) const {
78 |     fprintf(fp,"(fx=%lf fy=%lf)",((force*)a)[0].fx,((force*)a)[0].fy);
79 |   }
80 | 
81 | }
82 | 
83 | 
84 | #endif
85 | 
86 | 


--------------------------------------------------------------------------------
/src/shared/init_models.h:
--------------------------------------------------------------------------------
 1 | #ifndef __INIT_MODELS_H__
 2 | #define __INIT_MODELS_H__
 3 | namespace CTF_int{
 4 |   extern double long_contig_transp_mdl_init[];
 5 |   extern double shrt_contig_transp_mdl_init[];
 6 |   extern double non_contig_transp_mdl_init[];
 7 |   extern double alltoall_mdl_init[];
 8 |   extern double alltoallv_mdl_init[];
 9 |   extern double red_mdl_init[];
10 |   extern double red_mdl_cst_init[];
11 |   extern double csrred_mdl_init[];
12 |   extern double csrred_mdl_cst_init[];
13 |   extern double allred_mdl_init[];
14 |   extern double allred_mdl_cst_init[];
15 |   extern double bcast_mdl_init[];
16 |   extern double dgtog_res_mdl_init[];
17 |   extern double spredist_mdl_init[];
18 |   extern double blres_mdl_init[];
19 |   extern double pin_keys_mdl_init[];
20 |   extern double seq_tsr_ctr_mdl_cst_init[];
21 |   extern double seq_tsr_ctr_mdl_ref_init[];
22 |   extern double seq_tsr_ctr_mdl_inr_init[];
23 |   extern double seq_tsr_ctr_mdl_off_init[];
24 |   extern double seq_tsr_ctr_mdl_cst_inr_init[];
25 |   extern double seq_tsr_ctr_mdl_cst_off_init[];
26 |   extern double upload_mdl_init[];
27 |   extern double download_mdl_init[];
28 |   extern double seq_tsr_spctr_cst_off_k0_init[];
29 |   extern double seq_tsr_spctr_cst_off_k1_init[];
30 |   extern double seq_tsr_spctr_cst_off_k2_init[];
31 |   extern double seq_tsr_spctr_cst_k0_init[];
32 |   extern double seq_tsr_spctr_cst_k1_init[];
33 |   extern double seq_tsr_spctr_cst_k2_init[];
34 |   extern double seq_tsr_spctr_cst_k3_init[];
35 |   extern double seq_tsr_spctr_cst_k4_init[];
36 |   extern double seq_tsr_spctr_off_k0_init[];
37 |   extern double seq_tsr_spctr_off_k1_init[];
38 |   extern double seq_tsr_spctr_off_k2_init[];
39 |   extern double seq_tsr_spctr_k0_init[];
40 |   extern double seq_tsr_spctr_k1_init[];
41 |   extern double seq_tsr_spctr_k2_init[];
42 |   extern double seq_tsr_spctr_k3_init[];
43 |   extern double seq_tsr_spctr_k4_init[];
44 | }
45 | 
46 | #endif
47 | 


--------------------------------------------------------------------------------
/bench/model_trainer_kernels.cxx:
--------------------------------------------------------------------------------
 1 | #include <ctf.hpp>
 2 | using namespace CTF;
 3 | 
 4 | struct grp{
 5 | #ifdef __CUDACC__
 6 |   __device__ __host__
 7 | #endif
 8 |   static double op1(double a, double b){ return b-b/a; };
 9 | #ifdef __CUDACC__
10 |   __device__ __host__
11 | #endif
12 |   static void op2(double a, double & b){ b+=a; };
13 |   static double op2_t2(double a, double b){ return a+b; };
14 |   static void op2_red(double const * a, double * b, int n){ 
15 |     #pragma omp parallel for
16 |     for (int i=0; i<n; i++){
17 |       b[i] += a[i];
18 |     }
19 |   }
20 | };
21 | 
22 | 
23 | void train_off_vec_mat(int64_t n, int64_t m, World & dw, bool sp_A, bool sp_B, bool sp_C){
24 |   MPI_Op madd;
25 |   MPI_Op_create([](void * a, void * b, int * n, MPI_Datatype*){ 
26 |                   grp::op2_red((double*)a, (double*)b, *n);
27 |                 }, 1, &madd);
28 |   Monoid<> mon(0, grp::op2_t2, madd);
29 |   for (double sp = .005; sp<.32; sp*=2.){
30 |     Matrix<> A(m, n, dw, mon);
31 |     Matrix<> B(m, n, dw, mon);
32 |     Matrix<> G(n, n, dw, mon);
33 |     Vector<> b(n, dw, mon);
34 |     Vector<> c(m, dw, mon);
35 |   
36 |     srand48(dw.rank);
37 |     b.fill_random(-.5, .5);
38 |     c.fill_random(-.5, .5);
39 |     A.fill_random(-.5, .5);
40 |     B.fill_random(-.5, .5);
41 |     G.fill_random(-.5, .5);
42 |  
43 |     Bivar_Kernel<double,double,double,grp::op1,grp::op2> k1;
44 |     
45 |     if (sp > .009){
46 |       if (sp_A)
47 |         A.sparsify([=](double a){ return fabs(a)<=.5*sp; });
48 |       if (sp_B){
49 |         G.sparsify([=](double a){ return fabs(a)<=.5*sp; });
50 |         b.sparsify([=](double a){ return fabs(a)<=.5*sp; });
51 |       }
52 |       if (sp_C){
53 |         B.sparsify([=](double a){ return fabs(a)<=.5*sp; });
54 |         c.sparsify([=](double a){ return fabs(a)<=.5*sp; });
55 |       }
56 |     }
57 |   
58 |     k1(A["ik"],G["kj"],B["ij"]);
59 |     k1(A["ij"],b["j"],c["i"]);
60 |     
61 |   }
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/src/symmetry/sym_indices.h:
--------------------------------------------------------------------------------
 1 | /** Written by Devin Matthews */
 2 | 
 3 | #ifndef __INT_SYM_INDICES_H__
 4 | #define __INT_SYM_INDICES_H__
 5 | 
 6 | #include <assert.h>
 7 | 
 8 | template<typename RAIterator>
 9 | int relativeSign(RAIterator s1b, RAIterator s1e, RAIterator s2b, RAIterator s2e)
10 | {
11 |     int sz = s1e-s1b;
12 |     assert(sz == (int)(s2e-s2b));
13 |     int i, k;
14 |     int sign = 1;
15 |     std::vector<bool> seen(sz);
16 | 
17 |     for (i = 0;i < sz;i++) seen[i] = false;
18 | 
19 |     for (i = 0;i < sz;i++)
20 |     {
21 |         if (seen[i]) continue;
22 |         int j = i;
23 |         while (true)
24 |         {
25 |             for (k = 0;k < sz && (!(*(s1b+k) == *(s2b+j)) || seen[k]);k++);
26 |             assert(k < sz);
27 |             j = k;
28 |             seen[j] = true;
29 |             if (j == i) break;
30 |             sign = -sign;
31 |         }
32 |     }
33 | 
34 |     return sign;
35 | }
36 | 
37 | template<typename T>
38 | int relativeSign(const T& s1, const T& s2)
39 | {
40 |     return relativeSign(s1.begin(), s1.end(), s2.begin(), s2.end());
41 | }
42 | 
43 | template <typename T>
44 | int align_symmetric_indices(int order_A, T& idx_A, const int* sym_A,
45 |                             int order_B, T& idx_B, const int* sym_B);
46 | 
47 | template <typename T>
48 | int align_symmetric_indices(int order_A, T& idx_A, const int* sym_A,
49 |                             int order_B, T& idx_B, const int* sym_B,
50 |                             int order_C, T& idx_C, const int* sym_C);
51 | 
52 | template <typename T>
53 | int overcounting_factor(int order_A, const T& idx_A, const int* sym_A,
54 |                         int order_B, const T& idx_B, const int* sym_B,
55 |                         int order_C, const T& idx_C, const int* sym_C);
56 | 
57 | template <typename T>
58 | int overcounting_factor(int order_A, const T& idx_A, const int* sym_A,
59 |                         int order_B, const T& idx_B, const int* sym_B);
60 | 
61 | #endif
62 | 
63 | 


--------------------------------------------------------------------------------
/src/interface/group.h:
--------------------------------------------------------------------------------
 1 | #ifndef __GROUP_H__
 2 | #define __GROUP_H__
 3 | 
 4 | #include "../tensor/algstrct.h"
 5 | 
 6 | namespace CTF {
 7 |   /**
 8 |    * \addtogroup algstrct 
 9 |    * @{
10 |    **/
11 |   /**
12 |    * \brief Group is a Monoid with operator '-' defined
13 |    *   special case (parent) of a ring
14 |    */
15 |   template <typename dtype=double, bool is_ord=CTF_int::get_default_is_ord<dtype>()> 
16 |   class Group : public Monoid<dtype, is_ord> {
17 |     public:
18 |       Group(Group const & other) : Monoid<dtype, is_ord>(other) { }
19 | 
20 |       virtual CTF_int::algstrct * clone() const {
21 |         return new Group<dtype, is_ord>(*this);
22 |       }
23 | 
24 |       Group() : Monoid<dtype, is_ord>() { 
25 |         this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs<dtype, is_ord> >;
26 |       } 
27 | 
28 |       Group(dtype taddid_,
29 |             dtype (*fadd_)(dtype a, dtype b),
30 |             MPI_Op addmop_)
31 |               : Monoid<dtype, is_ord>(taddid_, fadd_, addmop_) { 
32 |         this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs<dtype, is_ord> >;
33 |       }
34 | 
35 |       //treat NULL as mulid
36 |       void safeaddinv(char const * a, char *& b) const {
37 |         if (a == NULL){
38 |           printf("CTF ERROR: unfortunately additive inverse functionality for groups is currently limited, as it is done for rings via scaling by the inverse of the multiplicative identity, which groups don't have. Use the tensor addinv function rather than an indexed expression.\n");
39 |           double * ptr = NULL;
40 |           ptr[0]=3.;
41 |           assert(0);
42 |         } else {
43 |           if (b==NULL) b = (char*)malloc(this->el_size);
44 |           ((dtype*)b)[0] = -((dtype*)a)[0];
45 |         }
46 |       }
47 | 
48 |       void addinv(char const * a, char * b) const {
49 |         ((dtype*)b)[0] = -((dtype*)a)[0];
50 |       }
51 |   };
52 | 
53 |   /**
54 |    * @}
55 |    */
56 | }
57 | #include "semiring.h"
58 | #endif
59 | 


--------------------------------------------------------------------------------
/src/interface/fun_term.h:
--------------------------------------------------------------------------------
 1 | #ifndef __CUST_TERM_H__
 2 | #define __CUST_TERM_H__
 3 | 
 4 | #include "term.h"
 5 | 
 6 | namespace CTF_int {
 7 |   class univar_function;
 8 |   class bivar_function;
 9 | }
10 | 
11 | namespace CTF_int {
12 |   class Unifun_Term : public Term{
13 |     public:
14 |       Term * A;
15 |       univar_function const * func;
16 | 
17 |       Unifun_Term(Term *                  A,
18 |                   univar_function const * func);
19 | 
20 |       Unifun_Term(Unifun_Term const & other,
21 |                   std::map<tensor*, tensor*>* remap=NULL);
22 | 
23 |       ~Unifun_Term();
24 | 
25 |       Term * clone(std::map<tensor*, tensor*>* remap = NULL) const;
26 | 
27 |       void execute(CTF::Idx_Tensor output) const;
28 | 
29 |       CTF::Idx_Tensor execute() const;
30 | 
31 |       CTF::Idx_Tensor estimate_time(double  & cost) const;
32 | 
33 |       double  estimate_time(CTF::Idx_Tensor output) const;
34 | 
35 |       void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
36 | 
37 |       CTF::World * where_am_i() const;
38 |   };
39 | 
40 |   class Bifun_Term : public Term {
41 |     public:
42 |       Term * A;
43 |       Term * B;
44 |       bivar_function const * func;
45 | 
46 |       Bifun_Term(Term *                 A,
47 |                  Term *                 B,
48 |                  bivar_function const * func);
49 | 
50 |       Bifun_Term(Bifun_Term const & other,
51 |                  std::map<tensor*, tensor*>* remap=NULL);
52 | 
53 |       ~Bifun_Term();
54 | 
55 |       Term * clone(std::map<tensor*, tensor*>* remap = NULL) const;
56 | 
57 |       void execute(CTF::Idx_Tensor output) const;
58 | 
59 |       CTF::Idx_Tensor execute() const;
60 | 
61 |       CTF::Idx_Tensor estimate_time(double  & cost) const;
62 | 
63 |       double  estimate_time(CTF::Idx_Tensor output) const;
64 | 
65 |       void get_inputs(std::set<CTF::Idx_Tensor*, tensor_name_less >* inputs_set) const;
66 | 
67 |       CTF::World * where_am_i() const;
68 |   };
69 | 
70 | }
71 | 
72 | #endif
73 | 


--------------------------------------------------------------------------------
/src/interface/partition.cxx:
--------------------------------------------------------------------------------
 1 | #include "partition.h"
 2 | #include "../shared/util.h"
 3 | 
 4 | namespace CTF {
 5 |   Partition::Partition(int order_, int const * lens_){
 6 |     order = order_;
 7 |     lens = (int*)CTF_int::alloc(order*sizeof(int));
 8 |     memcpy(lens, lens_, order*sizeof(int));
 9 |   }
10 | 
11 |   Partition::Partition(){
12 |     order = 0;
13 |     lens = NULL;
14 |   }
15 | 
16 |   Partition::~Partition(){
17 |     CTF_int::cdealloc(lens);
18 |   }
19 | 
20 |   Partition::Partition(Partition const & other){
21 |     order = other.order;
22 |     lens = (int*)CTF_int::alloc(order*sizeof(int));
23 |     memcpy(lens, other.lens, order*sizeof(int));
24 |   }
25 |   
26 |   void Partition::operator=(Partition const & other){
27 |     order = other.order;
28 |     lens = (int*)CTF_int::alloc(order*sizeof(int));
29 |     memcpy(lens, other.lens, order*sizeof(int));
30 |   }
31 | 
32 | 
33 |   Idx_Partition Partition::operator[](char const * idx){
34 |     return Idx_Partition(*this, idx);
35 |   }
36 | 
37 |   Idx_Partition::Idx_Partition(){
38 |     part = Partition(0, NULL);
39 |     idx = NULL;
40 |   }
41 | 
42 |   Idx_Partition::Idx_Partition(Partition const & part_, char const * idx_){
43 |     part = part_;
44 |     idx = (char*)malloc(part.order*sizeof(char));
45 |     memcpy(idx, idx_, part.order*sizeof(char));
46 |   }
47 | 
48 |   Idx_Partition::~Idx_Partition(){
49 |     if (idx != NULL){
50 |       free(idx);
51 |       idx = NULL;
52 |     }
53 |   }
54 | 
55 |   Idx_Partition Idx_Partition::reduce_order() const {
56 |     int * new_lens = (int*)malloc(part.order*sizeof(int));
57 |     int new_order = 0;
58 |     char * new_idx = (char*)malloc(part.order);
59 |     for (int i=0; i<part.order; i++){
60 |       if (part.lens[i] != 1){
61 |         new_lens[new_order] = part.lens[i];
62 |         new_idx[new_order] = idx[i];
63 |         new_order++;
64 |       }
65 |     }
66 |     Idx_Partition p = Partition(new_order, new_lens)[new_idx];
67 |     free(new_idx);
68 |     free(new_lens);
69 |     return p;
70 |   }
71 | 
72 | }
73 | 


--------------------------------------------------------------------------------
/test/ccsdt_map_test.cxx:
--------------------------------------------------------------------------------
 1 | /** Copyright (c) 2011, Edgar Solomonik, all rights reserved.
 2 |   * \addtogroup tests 
 3 |   * @{ 
 4 |   * \addtogroup CCSDT_T3_to_T2
 5 |   * @{ 
 6 |   * \brief A symmetric contraction from CCSDT compared with the explicitly permuted nonsymmetric form
 7 |   */
 8 | #include <ctf.hpp>
 9 | 
10 | using namespace CTF;
11 | 
12 | int ccsdt_map_test(int   n,
13 |                    World &dw){
14 | 
15 |   int rank, num_pes;
16 |   
17 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
18 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
19 | 
20 |   //int shapeAS6[] = {AS,AS,NS,AS,AS,NS};
21 |   int shapeNS6[] = {NS,NS,NS,NS,NS,NS};
22 |   int nnnnnn[] = {n,n,n,n,n,n};
23 |   int shapeNS4[] = {NS,NS,NS,NS};
24 |   int nnnn[] = {n,n,n,n};
25 | 
26 |   //* Creates distributed tensors initialized with zeros
27 |   Tensor<> W(4, nnnn, shapeNS4, dw, "W", 1);
28 |   Tensor<> T(4, nnnn, shapeNS4, dw, "T", 1);
29 |   Tensor<> Z(6, nnnnnn, shapeNS6, dw, "Z", 1);
30 | 
31 |   Z["hijmno"] += W["hijk"]*T["kmno"];
32 | 
33 |   return 1;
34 | } 
35 | 
36 | char* getCmdOption(char ** begin,
37 |                    char ** end,
38 |                    const   std::string & option){
39 |   char ** itr = std::find(begin, end, option);
40 |   if (itr != end && ++itr != end){
41 |     return *itr;
42 |   }
43 |   return 0;
44 | }
45 | 
46 | 
47 | int main(int argc, char ** argv){
48 |   int rank, np, niter, n;
49 |   int const in_num = argc;
50 |   char ** input_str = argv;
51 | 
52 |   MPI_Init(&argc, &argv);
53 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
54 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
55 | 
56 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
57 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
58 |     if (n < 0) n = 4;
59 |   } else n = 4;
60 | 
61 |   if (getCmdOption(input_str, input_str+in_num, "-niter")){
62 |     niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
63 |     if (niter < 0) niter = 3;
64 |   } else niter = 3;
65 | 
66 | 
67 | 
68 |   {
69 |     World dw(argc, argv);
70 |     int pass = ccsdt_map_test(n, dw);
71 |     assert(pass);
72 |   }
73 | 
74 | 
75 |   MPI_Finalize();
76 |   return 0;
77 | }
78 | /**
79 |  * @} 
80 |  * @}
81 |  */
82 | 
83 | 
84 | 


--------------------------------------------------------------------------------
/src/interface/monoid.cxx:
--------------------------------------------------------------------------------
 1 | #include "../sparse_formats/csr.h"
 2 | #include "set.h"
 3 | #include "../shared/blas_symbs.h"
 4 | #include "../shared/mkl_symbs.h"
 5 | #include "../shared/util.h"
 6 | using namespace CTF_int;
 7 | namespace CTF {
 8 | /*  template <>
 9 |   void Monoid<float,1>::csr_add(int64_t m, int64_t n, char const * a, int const * ja, int const * ia, char const * b, int const * jb, int const * ib, char *& c, int *& jc, int *& ic){
10 |     if (fadd != default_add<float>){
11 |       printf("CTF error: support for CSR addition for this type unavailable\n");
12 |       assert(0);
13 |     }
14 |     alloc(sizeof(int)*(m+1), (void**)&ic);
15 |     bool tA = 'N';
16 |     bool tB = 'N';
17 |     int job = 1;
18 |     int sort = 1;
19 |     float mlid = 1.0;
20 |     int info;
21 |     MKL_SCSRADD(tA, tB, &job, &sort, m, n, (float*)a, ja, ia, &mlid, (float*)b, jb, ib, NULL, NULL, ic, NULL, &info);
22 |     alloc(sizeof(int)*ic[m], (void**)&jc);
23 |     alloc(sizeof(float)*ic[m], (void**)&c);
24 |     int job = 2;
25 |     MKL_SCSRADD(tA, tB, &job, &sort, m, n, (float*)a, ja, ia, &mlid, (float*)b, jb, ib, (float*)c, jc, ic, NULL, &info);
26 |   }*/
27 | 
28 |   template <>
29 |   char * CTF::Monoid<double,1>::csr_add(char * cA, char * cB) const {
30 | #if USE_MKL
31 |     TAU_FSTART(mkl_csr_add)
32 |     if (fadd != &default_add<double>){
33 |       return CTF_int::algstrct::csr_add(cA, cB);
34 |     }
35 |     CSR_Matrix A(cA);
36 |     CSR_Matrix B(cB);
37 |     int * ic;
38 |     int m = A.nrow();
39 |     int n = A.ncol();
40 |     alloc_ptr(sizeof(int)*(m+1), (void**)&ic);
41 |     char tA = 'N';
42 |     int job = 1;
43 |     int sort = 1;
44 |     double mlid = 1.0;
45 |     int info;
46 |     CTF_BLAS::MKL_DCSRADD(&tA, &job, &sort, &m, &n, (double*)A.vals(), A.JA(), A.IA(), &mlid, (double*)B.vals(), B.JA(), B.IA(), NULL, NULL, ic, NULL, &info);
47 |     CSR_Matrix C(ic[m]-1, m, n, this);
48 |     memcpy(C.IA(), ic, sizeof(int)*(m+1));
49 |     cdealloc(ic);
50 |     job = 2;
51 |     CTF_BLAS::MKL_DCSRADD(&tA, &job, &sort, &m, &n, (double*)A.vals(), A.JA(), A.IA(), &mlid, (double*)B.vals(), B.JA(), B.IA(), (double*)C.vals(), C.JA(), C.IA(), NULL, &info);
52 |     TAU_FSTOP(mkl_csr_add)
53 |     return C.all_data;
54 | #else
55 |     return CTF_int::algstrct::csr_add(cA, cB);
56 | #endif
57 |   }
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/src/scaling/sym_seq_scl.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SYM_SEQ_SCL_H__
 2 | #define __SYM_SEQ_SCL_H__
 3 | 
 4 | #include "../tensor/algstrct.h"
 5 | #include "../interface/term.h"
 6 | 
 7 | namespace CTF_int {
 8 | 
 9 |   /**
10 |    * \brief untyped internal class for singly-typed single variable function (Endomorphism)
11 |    */
12 |   class endomorphism {
13 |     public:
14 |   /**
15 |        * \brief apply function f to value stored at a
16 |        * \param[in,out] a pointer to operand that will be cast to type by extending class
17 |        *                  return result of applying f on value at a
18 |        */
19 |       virtual void apply_f(char * a) const { assert(0); }
20 | 
21 |       /** 
22 |        * \brief apply f to A
23 |        * \param[in] A operand tensor with pre-defined indices 
24 |       */
25 |       void operator()(Term const & A) const;
26 | 
27 |       virtual ~endomorphism(){}
28 |   };
29 | 
30 |   /**
31 |    * \brief performs symmetric scaling using custom func
32 |    */
33 |   int sym_seq_scl_cust(char const *         alpha,
34 |                        char *               A,
35 |                        algstrct const *     sr_A,
36 |                        int const            order_A,
37 |                        int const *          edge_len_A,
38 |                        int const *          sym_A,
39 |                        int const *          idx_map_A,
40 |                        endomorphism const * func);
41 |   /**
42 |    * \brief performs symmetric scaling using algstrct const * sr_A
43 |    */
44 |   int sym_seq_scl_ref(char const *     alpha,
45 |                       char *           A,
46 |                       algstrct const * sr_A,
47 |                       int              order_A,
48 |                       int const *      edge_len_A,
49 |                       int const *      sym_A,
50 |                       int const *      idx_map_A);
51 |   /**
52 |    * \brief invert index map
53 |    * \param[in] order_A number of dimensions of A
54 |    * \param[in] idx_A index map of A
55 |    * \param[out] order_tot number of total dimensions
56 |    * \param[out] idx_arr 2*ndim_tot index array
57 |    */
58 |   void inv_idx(int const          order_A,
59 |                int const *        idx_A,
60 |                int *              order_tot,
61 |                int **             idx_arr);
62 | }
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/scaling/scale_tsr.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __SCL_TSR_H__
 4 | #define __SCL_TSR_H__
 5 | 
 6 | #include "../tensor/algstrct.h"
 7 | #include "sym_seq_scl.h"
 8 | 
 9 | namespace CTF_int {
10 | 
11 | 
12 |   class scl {
13 |     public:
14 |       char * A; 
15 |       algstrct const * sr_A;
16 |       char const * alpha;
17 |       void * buffer;
18 | 
19 |       virtual void run() {};
20 |       virtual int64_t mem_fp() { return 0; };
21 |       virtual scl * clone() { return NULL; };
22 |       
23 |       virtual ~scl(){ if (buffer != NULL) CTF_int::cdealloc(buffer); }
24 |       scl(scl * other);
25 |       scl(){ buffer = NULL; }
26 |   };
27 | 
28 |   class scl_virt : public scl {
29 |     public: 
30 |       /* Class to be called on sub-blocks */
31 |       scl * rec_scl;
32 | 
33 |       int num_dim;
34 |       int * virt_dim;
35 |       int order_A;
36 |       int64_t blk_sz_A;
37 |       int const * idx_map_A;
38 |       
39 |       void run();
40 |       int64_t mem_fp();
41 |       scl * clone();
42 |       
43 |       scl_virt(scl * other);
44 |       ~scl_virt();
45 |       scl_virt(){}
46 |   };
47 | 
48 |   class seq_tsr_scl : public scl {
49 |     public:
50 |       int order;
51 |       int * edge_len;
52 |       int const * idx_map;
53 |       int const * sym;
54 |       //fseq_tsr_scl func_ptr;
55 |   
56 |       int is_custom;
57 |       endomorphism const * func; //fseq_elm_scl custom_params;
58 |   
59 |       void run();
60 |       void print();
61 |       int64_t mem_fp();
62 |       scl * clone();
63 |   
64 |       /**
65 |        * \brief copies scl object
66 |        * \param[in] other object to copy
67 |        */
68 |       seq_tsr_scl(scl * other);
69 |       ~seq_tsr_scl(){ CTF_int::cdealloc(edge_len); };
70 |       seq_tsr_scl(){}
71 |   };
72 | 
73 |   /**
74 |    * \brief invert index map
75 |    * \param[in] order_A number of dimensions of A
76 |    * \param[in] idx_A index map of A
77 |    * \param[in] order_B number of dimensions of B
78 |    * \param[in] idx_B index map of B
79 |    * \param[out] order_tot number of total dimensions
80 |    * \param[out] idx_arr 2*order_tot index array
81 |    */
82 |   void inv_idx(int         order_A,
83 |                int const * idx_A,
84 |                int *       order_tot,
85 |                int **      idx_arr);
86 | 
87 | }
88 | #endif // __SCL_TSR_H__
89 | 


--------------------------------------------------------------------------------
/examples/spmv.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup examples 
  2 |   * @{ 
  3 |   * \defgroup spmv spmv
  4 |   * @{ 
  5 |   * \brief Multiplication of a random square sparse matrix by a vector
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | using namespace CTF;
 10 | 
 11 | int spmv(int     n,
 12 |          World & dw){
 13 | 
 14 |   Matrix<> spA(n, n, SP, dw);
 15 |   Matrix<> dnA(n, n, dw);
 16 |   Vector<> b(n, dw);
 17 |   Vector<> c1(n, dw);
 18 |   Vector<> c2(n, dw);
 19 | 
 20 |   srand48(dw.rank);
 21 |   b.fill_random(0.0,1.0);
 22 |   c1.fill_random(0.0,1.0);
 23 |   dnA.fill_random(0.0,1.0);
 24 | 
 25 |   spA["ij"] += dnA["ij"];
 26 |   spA.sparsify(.5);
 27 |   dnA["ij"] = 0.0;
 28 |   dnA["ij"] += spA["ij"];
 29 |  
 30 |   c2["i"] = c1["i"];
 31 |   
 32 |   c1["i"] += dnA["ij"]*b["j"];
 33 |   
 34 |   c2["i"] += .5*spA["ij"]*b["j"];
 35 |   c2["i"] += .5*b["j"]*spA["ij"];
 36 | 
 37 | 
 38 |   bool pass = c2.norm2() >= 1E-6;
 39 | 
 40 |   c2["i"] -= c1["i"];
 41 | 
 42 |   if (pass) pass = c2.norm2() <= 1.E-6;
 43 | 
 44 |   if (dw.rank == 0){
 45 |     if (pass) 
 46 |       printf("{ c[\"i\"] += A[\"ij\"]*b[\"j\"] with sparse, A } passed \n");
 47 |     else
 48 |       printf("{ c[\"i\"] += A[\"ij\"]*b[\"j\"] with sparse, A } failed \n");
 49 |   }
 50 |   return pass;
 51 | } 
 52 | 
 53 | 
 54 | #ifndef TEST_SUITE
 55 | char* getCmdOption(char ** begin,
 56 |                    char ** end,
 57 |                    const   std::string & option){
 58 |   char ** itr = std::find(begin, end, option);
 59 |   if (itr != end && ++itr != end){
 60 |     return *itr;
 61 |   }
 62 |   return 0;
 63 | }
 64 | 
 65 | 
 66 | int main(int argc, char ** argv){
 67 |   int rank, np, n, pass;
 68 |   int const in_num = argc;
 69 |   char ** input_str = argv;
 70 | 
 71 |   MPI_Init(&argc, &argv);
 72 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 73 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 74 | 
 75 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 76 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 77 |     if (n < 0) n = 7;
 78 |   } else n = 7;
 79 | 
 80 | 
 81 |   {
 82 |     World dw(argc, argv);
 83 | 
 84 |     if (rank == 0){
 85 |       printf("Multiplying %d-by-%d sparse matrix by vector\n",n,n);
 86 |     }
 87 |     pass = spmv(n, dw);
 88 |     assert(pass);
 89 |   }
 90 | 
 91 |   MPI_Finalize();
 92 |   return 0;
 93 | }
 94 | /**
 95 |  * @} 
 96 |  * @}
 97 |  */
 98 | 
 99 | #endif
100 | 


--------------------------------------------------------------------------------
/src/mapping/distribution.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __INT_DISTRIBUTION_H__
 4 | #define __INT_DISTRIBUTION_H__
 5 | 
 6 | #include "mapping.h"
 7 | 
 8 | namespace CTF_int {
 9 | 
10 |   class tensor;
11 | 
12 |   inline
13 |   int get_distribution_size(int order){
14 |     return sizeof(int)*2 + sizeof(int64_t) + order*sizeof(int)*7;
15 |   }
16 | 
17 |   // \brief data distribution object used for redistribution
18 |   class distribution {
19 |     public:
20 |       int     order;
21 |       int *   phase;
22 |       int *   virt_phase;
23 |       int *   phys_phase;
24 |       int *   pe_lda;
25 |       int *   pad_edge_len;
26 |       int *   padding;
27 |       int *   perank;
28 |       int     is_cyclic;
29 |       int64_t size;
30 | 
31 |       distribution();
32 |       ~distribution();
33 | 
34 |       /**
35 |        * \brief create distribution object which defines a tensor's data decomposition
36 |        * \param[in] tsr tensor whose distribution to record
37 |        */
38 |       distribution(tensor const * tsr);
39 | 
40 |       /**
41 |        * \brief create distribution object by deserializing buffer
42 |        * \param[in] buffer serialized distribution data 
43 |        */
44 |       distribution(char const * buffer);
45 | 
46 |       /**
47 |        * \brief serialize object into contiguous data buffer
48 |         \param[out] buffer unallocated array into which to serialize
49 |        * \param[out] size length of serialized array
50 |       */
51 |       void serialize(char ** buffer, int * size);
52 |     private:
53 |       void free_data();
54 |   };
55 | 
56 |   /**
57 |    * \brief calculate the block-sizes of a tensor
58 |    * \param[in] order number of dimensions of this tensor
59 |    * \param[in] size is the size of the local tensor stored
60 |    * \param[in] edge_len edge lengths of global tensor
61 |    * \param[in] edge_map mapping of each dimension
62 |    * \param[out] vrt_sz size of virtual block
63 |    * \param[out] vrt_edge_len edge lengths of virtual block
64 |    * \param[out] blk_edge_len edge lengths of local block
65 |    */
66 |   void calc_dim(int             order,
67 |                 int64_t         size,
68 |                 int const *     edge_len,
69 |                 mapping const * edge_map,
70 |                 int64_t *       vrt_sz,
71 |                 int *           vrt_edge_len,
72 |                 int *           blk_edge_len);
73 | }
74 | 
75 | #endif
76 | 


--------------------------------------------------------------------------------
/test/reduce_bcast.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup reduce_bcast reduce_bcast 
  6 |   * @{ 
  7 |   * \brief Summation along tensor diagonals
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | 
 12 | using namespace CTF;
 13 | 
 14 | int reduce_bcast(int     n,
 15 |                  World & dw){
 16 |   int pass;
 17 | 
 18 |   Matrix<> A(n,n,dw);
 19 |   Matrix<> B(n,1,dw);
 20 |   Matrix<> C(n,n,dw);
 21 |   Matrix<> C2(n,n,dw);
 22 |   Vector<> d(n,dw);
 23 | 
 24 |   srand48(13*dw.rank);
 25 | 
 26 |   A.fill_random(0.,1.);
 27 |   B.fill_random(0.,1.);
 28 |   C.fill_random(0.,1.);
 29 |   C2["ij"] = C["ij"];
 30 |   d.fill_random(0.,1.);
 31 | 
 32 |   C["ij"] += B["ik"];
 33 | 
 34 |   d["i"] = B["ij"];
 35 | 
 36 |   C2["ij"] += d["i"];
 37 | 
 38 |   C["ij"] -= C2["ij"];
 39 | 
 40 |   pass = true;
 41 |   if (C.norm2() > 1.E-6){
 42 |     pass = false;
 43 |     if (dw.rank == 0)
 44 |       printf("{ (A[\"ij\"]+=B[\"ik\"] with square B } failed \n");
 45 |     return pass;
 46 |   }
 47 | 
 48 |   C["ij"] = C2["ij"];
 49 | 
 50 |   C["ij"] += B["ik"];
 51 | 
 52 |   d["i"] = B["ik"];
 53 |   
 54 |   C2["ij"] += d["i"];
 55 | 
 56 |   C["ij"] -= C2["ij"];
 57 | 
 58 |   if (C.norm2() > 1.E-6)
 59 |     pass = false;
 60 | 
 61 |   if (pass){
 62 |     if (dw.rank == 0)
 63 |       printf("{ (A[\"ij\"]+=B[\"ik\"] } passed \n");
 64 |   } else {
 65 |     if (dw.rank == 0)
 66 |       printf("{ (A[\"ij\"]+=B[\"ik\"] with column vector B } failed \n");
 67 |   }
 68 |   return pass;
 69 | } 
 70 | 
 71 | 
 72 | #ifndef TEST_SUITE
 73 | char* getCmdOption(char ** begin,
 74 |                    char ** end,
 75 |                    const   std::string & option){
 76 |   char ** itr = std::find(begin, end, option);
 77 |   if (itr != end && ++itr != end){
 78 |     return *itr;
 79 |   }
 80 |   return 0;
 81 | }
 82 | 
 83 | 
 84 | int main(int argc, char ** argv){
 85 |   int rank, np, n;
 86 |   int in_num = argc;
 87 |   char ** input_str = argv;
 88 | 
 89 |   MPI_Init(&argc, &argv);
 90 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 91 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 92 | 
 93 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 94 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 95 |     if (n < 0) n = 7;
 96 |   } else n = 7;
 97 | 
 98 | 
 99 |   {
100 |     World dw(argc, argv);
101 |     reduce_bcast(n, dw);
102 |   }
103 | 
104 |   MPI_Finalize();
105 |   return 0;
106 | }
107 | /**
108 |  * @} 
109 |  * @}
110 |  */
111 | 
112 | #endif
113 | 


--------------------------------------------------------------------------------
/src/scaling/scaling.h:
--------------------------------------------------------------------------------
 1 | #ifndef __INT_SCALING_H__
 2 | #define __INT_SCALING_H__
 3 | 
 4 | #include "../interface/common.h"
 5 | #include "sym_seq_scl.h"
 6 | 
 7 | namespace CTF_int {
 8 |   class tensor; 
 9 |   class endomorphism; 
10 | 
11 |   /**
12 |    * \brief class for execution distributed scaling of a tensor
13 |    */
14 |   class scaling {
15 |     public:
16 |       /** \brief operand/output */
17 |       tensor * A;
18 | 
19 |       /** \brief scaling of A */
20 |       char const * alpha;
21 |     
22 |       /** \brief indices of A */
23 |       int * idx_map;
24 |       
25 |       /** \brief whether there is a elementwise custom function */
26 |       bool is_custom;
27 | 
28 |       /** \brief function to execute on elementwise elements */
29 |       endomorphism const * func;
30 | 
31 |       /**
32 |        * \brief constructor definining contraction with C's mul and add ops
33 |        * \param[in] A left operand tensor
34 |        * \param[in] idx_map indices of left operand
35 |        * \param[in] alpha scaling factor alpha * A[idx_map];
36 |                       A[idx_map] = alpha * A[idx_map]
37 |        */
38 |       scaling(tensor *     A,
39 |               int const *  idx_map,
40 |               char const * alpha);
41 |       scaling(tensor *     A,
42 |               char const * idx_map,
43 |               char const * alpha);
44 | 
45 |       /**
46 |        * \brief constructor definining scaling with custom function
47 |        * \param[in] A left operand tensor
48 |        * \param[in] idx_map indices of left operand
49 |                       func(&A[idx_map])
50 |        * \param[in] alpha scaling factor alpha * A[idx_map];
51 |                       A[idx_map] = alpha * func(A[idx_map])
52 |        * \param[in] func elementwise function
53 |        */
54 |       scaling(tensor *             A,
55 |               int const *          idx_map,
56 |               char const *         alpha,
57 |               endomorphism const * func);
58 |       scaling(tensor *             A,
59 |               char const *         idx_map,
60 |               char const *         alpha,
61 |               endomorphism const * func);
62 | 
63 |       /** \brief destructor */
64 |       ~scaling();
65 | 
66 |       /** \brief run scaling  \return whether success or error */
67 |       int execute();
68 |       
69 |       /** \brief predicts execution time in seconds using performance models */
70 |       double estimate_time();
71 |     
72 |       /**
73 |        * \brief scales a sparse tensor
74 |        */
75 |       void sp_scl();
76 |   };
77 | 
78 | }
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/src/contraction/spctr_comm.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __SPCTR_COMM_H__
 4 | #define __SPCTR_COMM_H__
 5 | 
 6 | #include "spctr_tsr.h"
 7 | 
 8 | namespace CTF_int{
 9 |   class contraction;
10 | 
11 |   class spctr_replicate : public spctr {
12 |     public: 
13 |       int ncdt_A; /* number of processor dimensions to replicate A along */
14 |       int ncdt_B; /* number of processor dimensions to replicate B along */
15 |       int ncdt_C; /* number of processor dimensions to replicate C along */
16 |       int64_t size_A; /* size of A blocks */
17 |       int64_t size_B; /* size of B blocks */
18 |       int64_t size_C; /* size of C blocks */
19 | 
20 |       CommData ** cdt_A;
21 |       CommData ** cdt_B;
22 |       CommData ** cdt_C;
23 |       /* Class to be called on sub-blocks */
24 |       spctr * rec_ctr;
25 |     /*  void set_size_blk_A(int new_nblk_A, int64_t const * nnbA){
26 |         spctr::set_size_blk_A(new_nblk_A, nnbA);
27 |         rec_ctr->set_size_blk_A(new_nblk_A, nnbA);
28 |       }*/
29 |       
30 |       void run(char * A, int nblk_A, int64_t const * size_blk_A,
31 |                char * B, int nblk_B, int64_t const * size_blk_B,
32 |                char * C, int nblk_C, int64_t * size_blk_C,
33 |                char *& new_C);
34 |       /**
35 |        * \brief returns the number of bytes of buffer space
36 |        *  we need 
37 |        * \return bytes needed
38 |        */
39 |       /**
40 |        * \brief returns the number of bytes need by each processor in this kernel 
41 |        * \return bytes needed for contraction
42 |        */
43 |       int64_t spmem_fp(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
44 |       /**
45 |        * \brief returns the number of bytes need by each processor in this kernel and its recursive calls
46 |        * \return bytes needed for recursive contraction
47 |        */
48 |       int64_t spmem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
49 |       double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
50 |       double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
51 |       void print();
52 |       spctr * clone();
53 | 
54 |       spctr_replicate(spctr * other);
55 |       ~spctr_replicate();
56 |       spctr_replicate(contraction const * c,
57 |                     int const *         phys_mapped,
58 |                     int64_t             blk_sz_A,
59 |                     int64_t             blk_sz_B,
60 |                     int64_t             blk_sz_C);
61 |   };
62 | 
63 | } 
64 | #endif // __CTR_COMM_H__
65 | 


--------------------------------------------------------------------------------
/test/python/test_dot.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import unittest
 4 | import numpy
 5 | import ctf
 6 | import os
 7 | import sys
 8 | 
 9 | 
10 | def allclose(a, b):
11 |     return abs(ctf.to_nparray(a) - ctf.to_nparray(b)).sum() < 1e-14
12 | 
13 | class KnowValues(unittest.TestCase):
14 |     def test_dot_1d(self):
15 |         a1 = numpy.ones(4)
16 |         self.assertTrue(allclose(ctf.dot(ctf.astensor(a1), a1), numpy.dot(a1, a1)))
17 |         self.assertTrue(allclose(ctf.dot(a1+1j, ctf.astensor(a1)), numpy.dot(a1+1j, a1)))
18 |         a2 = ctf.astensor(a1).dot(a1+0j)
19 |         self.assertTrue(a2.dtype == numpy.complex128)
20 |         #self.assertTrue(ctf.astensor(a1).dot(a1+0j).dtype == numpy.complex)
21 | 
22 |     def test_dot_2d(self):
23 |         a1 = numpy.random.random(4)
24 |         a2 = numpy.random.random((4,3))
25 |         self.assertTrue(ctf.dot(ctf.astensor(a1), ctf.astensor(a2)).shape == (3,))
26 |         self.assertTrue(allclose(ctf.dot(a1, ctf.astensor(a2)), numpy.dot(a1, a2)))
27 |         self.assertTrue(ctf.dot(ctf.astensor(a2).T(), a1).shape == (3,))
28 |         self.assertTrue(allclose(ctf.dot(ctf.astensor(a2).T(), a1), numpy.dot(a2.T, a1)))
29 | 
30 |         with self.assertRaises(ValueError):
31 |             ctf.dot(a2, a2)
32 |         self.assertTrue(allclose(ctf.dot(ctf.astensor(a2).T(), a2), numpy.dot(a2.T, a2)))
33 |         self.assertTrue(allclose(ctf.astensor(a2).dot(a2.T), a2.dot(a2.T)))
34 | 
35 |     def test_tensordot(self):
36 |         a0 = numpy.random.random((2,2,2))
37 |         self.assertTrue(allclose(ctf.tensordot(a0, a0), numpy.tensordot(a0, a0)))
38 |         self.assertTrue(allclose(ctf.tensordot(a0, a0, 1), numpy.tensordot(a0, a0, 1)))
39 |         self.assertTrue(allclose(ctf.tensordot(a0, a0, [[1,0],[1,0]]), numpy.tensordot(a0, a0, [[1,0],[1,0]])))
40 |         self.assertTrue(allclose(ctf.tensordot(a0, a0, [[0,1],[1,0]]), numpy.tensordot(a0, a0, [[0,1],[1,0]])))
41 |         self.assertTrue(allclose(ctf.tensordot(a0, a0, [[2,1,0],[1,0,2]]), numpy.tensordot(a0, a0, [[2,1,0],[1,0,2]])))
42 |         with self.assertRaises(IndexError):
43 |             ctf.tensordot(a0, a0, [[2,1,0,3],[0,1,2,3]])
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     numpy.random.seed(5330);
48 |     if ctf.comm().rank() != 0:
49 |         result = unittest.TextTestRunner(stream = open(os.devnull, 'w')).run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
50 |     else:
51 |         print("Tests for dot")
52 |         result = unittest.TextTestRunner().run(unittest.TestSuite(unittest.TestLoader().loadTestsFromTestCase(KnowValues)))
53 |     ctf.MPI_Stop()
54 |     sys.exit(not result)
55 | 


--------------------------------------------------------------------------------
/src/interface/vector.cxx:
--------------------------------------------------------------------------------
 1 | #include "common.h"
 2 | 
 3 | namespace CTF_int {
 4 | 
 5 |   struct int1
 6 |   {
 7 |     int i[1];
 8 |     int1(int a)
 9 |     {
10 |       i[0] = a;
11 |     }
12 |     operator const int*() const
13 |     {
14 |       return i;
15 |     }
16 |   };
17 | }
18 | namespace CTF {
19 |   template<typename dtype>
20 |   Vector<dtype>::Vector() : Tensor<dtype>() { }
21 | 
22 |   template<typename dtype>
23 |   Vector<dtype>::Vector(Vector<dtype> const & A)
24 |     : Tensor<dtype>(A) {
25 |     len = A.len;
26 |   }
27 | 
28 |   template<typename dtype>
29 |   Vector<dtype>::Vector(Tensor<dtype> const & A)
30 |     : Tensor<dtype>(A) {
31 |     IASSERT(A.order == 1);
32 |     len = A.lens[0];
33 |   }
34 | 
35 |   template<typename dtype>
36 |   Vector<dtype>::Vector(int                       len_,
37 |                         World &                   world_,
38 |                         CTF_int::algstrct const & sr_)
39 |    : Tensor<dtype>(1, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, NULL, 0) {
40 |     len = len_;
41 |   }
42 | 
43 |   template<typename dtype>
44 |   Vector<dtype>::Vector(int                       len_,
45 |                         World &                   world_,
46 |                         char const *              name_,
47 |                         int                       profile_,
48 |                         CTF_int::algstrct const & sr_)
49 |    : Tensor<dtype>(1, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, name_, profile_) {
50 |     len = len_;
51 |   }
52 | 
53 |   template<typename dtype>
54 |   Vector<dtype>::Vector(int                       len_,
55 |                         int                       atr_,
56 |                         World &                   world_,
57 |                         char const *              name_,
58 |                         int                       profile_,
59 |                         CTF_int::algstrct const & sr_)
60 |    : Tensor<dtype>(1, atr_>0, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_, name_, profile_) {
61 |     len = len_;
62 |   }
63 | 
64 |   template<typename dtype>
65 |   Vector<dtype>::Vector(int                       len_,
66 |                         int                       atr_,
67 |                         World &                   world_,
68 |                         CTF_int::algstrct const & sr_)
69 |    : Tensor<dtype>(1, atr_>0, CTF_int::int1(len_), CTF_int::int1(NS), world_, sr_) {
70 |     len = len_;
71 |   }
72 | 
73 |   //template<typename dtype>
74 |   //Vector<dtype> & Vector<dtype>::operator=(const Vector<dtype> & A){
75 |   //  CTF_int::tensor::free_self();
76 |   //  CTF_int::tensor::init(A.sr, A.order, A.lens, A.sym, A.wrld, 1, A.name, A.profile, A.is_sparse);
77 |   //  return *this;
78 |   //}
79 | 
80 | 
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/test/diag_ctr.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup diag_ctr diag_ctr
  6 |   * @{ 
  7 |   * \brief Summation along tensor diagonals
  8 |   */
  9 | #include <ctf.hpp>
 10 | 
 11 | using namespace CTF;
 12 | 
 13 | int diag_ctr(int     n,
 14 |              int     m,
 15 |              World & dw){
 16 |   int rank, i, num_pes, pass;
 17 |   int64_t np;
 18 |   double * pairs;
 19 |   int64_t * indices;
 20 |   
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 22 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 23 | 
 24 | 
 25 |   int shapeN4[] = {NS,NS,NS,NS};
 26 |   int sizeN4[] = {n,m,n,m};
 27 | 
 28 |   //* Creates distributed tensors initialized with zeros
 29 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 30 | 
 31 |   srand48(13*rank);
 32 | 
 33 |   Matrix<> mA(n,m,NS,dw);
 34 |   Matrix<> mB(n,m,NS,dw);
 35 |   A.get_local_data(&np, &indices, &pairs);
 36 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 37 |   A.write(np, indices, pairs);
 38 |   delete [] pairs;
 39 |   free(indices);
 40 |   pass = 1;
 41 |   double tr = 0.0;
 42 |   tr += A["aiai"];
 43 |   if (fabs(tr) < 1.E-10){
 44 |     pass = 0;
 45 |   }
 46 |   mA["ai"] = A["aiai"];
 47 |   tr -= mA["ai"];
 48 | 
 49 |   if (fabs(tr) > 1.E-10)
 50 |     pass = 0;
 51 |   if (pass){
 52 |     if (rank == 0)
 53 |       printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } passed \n");
 54 |   } else {
 55 |     if (rank == 0)
 56 |       printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } failed \n");
 57 |   }
 58 |   
 59 | 
 60 |   return pass;
 61 | } 
 62 | 
 63 | 
 64 | #ifndef TEST_SUITE
 65 | char* getCmdOption(char ** begin,
 66 |                    char ** end,
 67 |                    const   std::string & option){
 68 |   char ** itr = std::find(begin, end, option);
 69 |   if (itr != end && ++itr != end){
 70 |     return *itr;
 71 |   }
 72 |   return 0;
 73 | }
 74 | 
 75 | 
 76 | int main(int argc, char ** argv){
 77 |   int rank, np, n, m;
 78 |   int in_num = argc;
 79 |   char ** input_str = argv;
 80 | 
 81 |   MPI_Init(&argc, &argv);
 82 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 83 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 84 | 
 85 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 86 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 87 |     if (n < 0) n = 7;
 88 |   } else n = 7;
 89 | 
 90 |   if (getCmdOption(input_str, input_str+in_num, "-m")){
 91 |     m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
 92 |     if (m < 0) m = 7;
 93 |   } else m = 7;
 94 | 
 95 |   {
 96 |     World dw(argc, argv);
 97 |     diag_ctr(n, m, dw);
 98 |   }
 99 | 
100 |   MPI_Finalize();
101 |   return 0;
102 | }
103 | /**
104 |  * @} 
105 |  * @}
106 |  */
107 | 
108 | #endif
109 | 


--------------------------------------------------------------------------------
/src/summation/sym_seq_sum.h:
--------------------------------------------------------------------------------
 1 | #ifndef __INT_SYM_SEQ_SUM_H__
 2 | #define __INT_SYM_SEQ_SUM_H__
 3 | 
 4 | #include "summation.h"
 5 | 
 6 | namespace CTF_int {
 7 |   /**
 8 |    * \brief performs symmetric contraction with unblocked reference kernel
 9 |    */
10 |   int sym_seq_sum_ref( char const *     alpha,
11 |                        char const *     A,
12 |                        algstrct const * sr_A,
13 |                        int              order_A,
14 |                        int const *      edge_len_A,
15 |                        int const *      sym_A,
16 |                        int const *      idx_map_A,
17 |                        char const *     beta,
18 |                        char *           B,
19 |                        algstrct const * sr_B,
20 |                        int              order_B,
21 |                        int const *      edge_len_B,
22 |                        int const *      sym_B,
23 |                        int const *      idx_map_B);
24 | 
25 |   /**
26 |    * \brief performs symmetric summation with custom elementwise function
27 |    */
28 |   int sym_seq_sum_cust(char const *            alpha,
29 |                        char const *            A,
30 |                        algstrct const *        sr_A,
31 |                        int                     order_A,
32 |                        int const *             edge_len_A,
33 |                        int const *             sym_A,
34 |                        int const *             idx_map_A,
35 |                        char const *            beta,
36 |                        char *                  B,
37 |                        algstrct const *        sr_B,
38 |                        int                     order_B,
39 |                        int const *             edge_len_B,
40 |                        int const *             sym_B,
41 |                        int const *             idx_map_B,
42 |                        univar_function const * func);
43 | 
44 |   /**
45 |    * \brief performs symmetric summation with blocked daxpy
46 |    */
47 |   int sym_seq_sum_inr( char const *     alpha,
48 |                        char const *     A,
49 |                        algstrct const * sr_A,
50 |                        int              order_A,
51 |                        int const *      edge_len_A,
52 |                        int const *      sym_A,
53 |                        int const *      idx_map_A,
54 |                        char const *     beta,
55 |                        char *           B,
56 |                        algstrct const * sr_B,
57 |                        int              order_B,
58 |                        int const *      edge_len_B,
59 |                        int const *      sym_B,
60 |                        int const *      idx_map_B,
61 |                        int              inr_stride);
62 | }
63 | 
64 | #endif
65 | 


--------------------------------------------------------------------------------
/test/endomorphism.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup endomorphism endomorphism
  6 |   * @{ 
  7 |   * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | void fdbl(double & a){
 14 |   a=a*a*a;
 15 | }
 16 | 
 17 | int endomorphism(int     n,
 18 |                  World & dw){
 19 |   
 20 |   int shapeN4[] = {NS,NS,NS,NS};
 21 |   int sizeN4[] = {n+1,n,n+2,n+3};
 22 | 
 23 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 24 | 
 25 |   A.fill_random(-.5, .5);
 26 | 
 27 | 
 28 |   double * all_start_data;
 29 |   int64_t nall;
 30 |   A.read_all(&nall, &all_start_data);
 31 | 
 32 |   double scale = 1.0;
 33 | 
 34 |   CTF::Transform<double> endo([=](double & d){ d=scale*d*d*d; });
 35 |   // below is equivalent to A.scale(1.0, "ijkl", endo);
 36 |   endo(A["ijkl"]);
 37 | 
 38 |   double * all_end_data;
 39 |   int64_t nall2;
 40 |   A.read_all(&nall2, &all_end_data);
 41 | 
 42 |   int pass = (nall == nall2);
 43 |   if (pass){
 44 |     for (int64_t i=0; i<nall; i++){
 45 |       fdbl(all_start_data[i]);
 46 |       if (fabs(all_start_data[i]-all_end_data[i])>=1.E-6) pass =0;
 47 |     }
 48 |   } 
 49 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 50 | 
 51 |   if (dw.rank == 0){
 52 |     if (pass){
 53 |       printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } passed\n");
 54 |     } else {
 55 |       printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } failed\n");
 56 |     }
 57 |   } 
 58 | 
 59 |   delete [] all_start_data;
 60 |   delete [] all_end_data;
 61 |   
 62 |   return pass;
 63 | } 
 64 | 
 65 | 
 66 | #ifndef TEST_SUITE
 67 | 
 68 | char* getCmdOption(char ** begin,
 69 |                    char ** end,
 70 |                    const   std::string & option){
 71 |   char ** itr = std::find(begin, end, option);
 72 |   if (itr != end && ++itr != end){
 73 |     return *itr;
 74 |   }
 75 |   return 0;
 76 | }
 77 | 
 78 | 
 79 | int main(int argc, char ** argv){
 80 |   int rank, np, n;
 81 |   int const in_num = argc;
 82 |   char ** input_str = argv;
 83 | 
 84 |   MPI_Init(&argc, &argv);
 85 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 86 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 87 | 
 88 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 89 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 90 |     if (n < 0) n = 5;
 91 |   } else n = 5;
 92 | 
 93 | 
 94 |   {
 95 |     World dw(MPI_COMM_WORLD, argc, argv);
 96 | 
 97 |     if (rank == 0){
 98 |       printf("Computing endomorphism A_ijkl = f(A_ijkl)\n");
 99 |     }
100 |     endomorphism(n, dw);
101 |   }
102 | 
103 | 
104 |   MPI_Finalize();
105 |   return 0;
106 | }
107 | 
108 | /**
109 |  * @} 
110 |  * @}
111 |  */
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/src/shared/init_models.cxx:
--------------------------------------------------------------------------------
 1 | namespace CTF_int{
 2 | double seq_tsr_spctr_cst_off_k0_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
 3 | double seq_tsr_spctr_cst_off_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
 4 | double seq_tsr_spctr_cst_off_k2_init[] = {-2.1996E-04, 3.1883E-09, 3.8743E-11};
 5 | double seq_tsr_spctr_off_k0_init[] = {8.6970E-06, 4.5598E-11, 1.1544E-09};
 6 | double seq_tsr_spctr_off_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
 7 | double seq_tsr_spctr_off_k2_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
 8 | double seq_tsr_spctr_cst_k0_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
 9 | double seq_tsr_spctr_cst_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
10 | double seq_tsr_spctr_cst_k2_init[] = {-8.8459E-08, 8.1207E-10, -2.8486E-12};
11 | double seq_tsr_spctr_cst_k3_init[] = {1.8504E-08, 2.9154E-11, 2.1973E-11};
12 | double seq_tsr_spctr_cst_k4_init[] = {2.0948E-05, 1.2294E-09, 8.0037E-10};
13 | double seq_tsr_spctr_k0_init[] = {2.2620E-08, -5.7494E-10, 2.2146E-09};
14 | double seq_tsr_spctr_k1_init[] = {5.3745E-06, 3.6464E-08, 2.2334E-10};
15 | double seq_tsr_spctr_k2_init[] = {3.0917E-08, 5.2181E-11, 4.1634E-12};
16 | double seq_tsr_spctr_k3_init[] = {7.2456E-08, 1.5128E-10, -1.5528E-12};
17 | double seq_tsr_spctr_k4_init[] = {1.6880E-07, 4.9411E-10, 9.2847E-13};
18 | double pin_keys_mdl_init[] = {3.1189E-09, 6.6717E-08};
19 | double seq_tsr_ctr_mdl_cst_init[] = {5.1626E-06, -6.3215E-11, 3.9638E-09};
20 | double seq_tsr_ctr_mdl_ref_init[] = {4.9138E-08, 5.8290E-10, 4.8575E-11};
21 | double seq_tsr_ctr_mdl_inr_init[] = {2.0647E-08, 1.9721E-10, 2.9948E-11};
22 | double seq_tsr_ctr_mdl_off_init[] = {6.2925E-05, 1.7449E-11, 1.7211E-12};
23 | double seq_tsr_ctr_mdl_cst_inr_init[] = {1.3863E-04, 2.0119E-10, 9.8820E-09};
24 | double seq_tsr_ctr_mdl_cst_off_init[] = {8.4844E-04, -5.9246E-11, 3.5247E-10};
25 | double long_contig_transp_mdl_init[] = {2.9158E-10, 3.0501E-09};
26 | double shrt_contig_transp_mdl_init[] = {1.3427E-08, 4.3168E-09};
27 | double non_contig_transp_mdl_init[] = {4.0475E-08, 4.0463E-09};
28 | double dgtog_res_mdl_init[] = {2.9786E-05, 2.4335E-04, 1.0845E-08};
29 | double blres_mdl_init[] = {1.0598E-05, 7.2741E-08};
30 | double alltoall_mdl_init[] = {1.0000E-06, 1.0000E-06, 5.0000E-10};
31 | double alltoallv_mdl_init[] = {2.7437E-06, 2.2416E-05, 1.0469E-08};
32 | double red_mdl_init[] = {6.2935E-07, 4.6276E-06, 9.2245E-10};
33 | double red_mdl_cst_init[] = {5.7302E-07, 4.7347E-06, 6.0191E-10};
34 | double allred_mdl_init[] = {8.4416E-07, 6.8651E-06, 3.5845E-08};
35 | double allred_mdl_cst_init[] = {-3.3754E-04, 2.1343E-04, 3.0801E-09};
36 | double bcast_mdl_init[] = {1.5045E-06, 1.4485E-05, 3.2876E-09};
37 | double spredist_mdl_init[] = {1.2744E-04, 1.0278E-03, 7.6837E-08};
38 | double csrred_mdl_init[] = {3.7005E-05, 1.1854E-04, 5.5165E-09};
39 | double csrred_mdl_cst_init[] = {-1.8323E-04, 1.3076E-04, 2.8732E-09};
40 | }
41 | 


--------------------------------------------------------------------------------
/src/interface/back_comp.h:
--------------------------------------------------------------------------------
 1 | #ifndef __BACK_COMP_H__
 2 | #define __BACK_COMP_H__
 3 | 
 4 | /* pure double version of templated namespace CTF,
 5 |    'using namespace CTF_double' cannot be used in combination in conjunction with 'using namespace CTF' */
 6 | namespace CTF_double {
 7 |   typedef CTF::World World;
 8 | 
 9 |   typedef CTF::Tensor<> Tensor;
10 |   typedef CTF::Matrix<> Matrix;
11 |   typedef CTF::Vector<> Vector;
12 |   typedef CTF::Scalar<> Scalar;
13 | 
14 |   typedef CTF::Timer          Timer;
15 |   typedef CTF::Timer_epoch    Timer_epoch;
16 |   typedef CTF::Function_timer Function_timer;
17 |   typedef CTF::Flop_counter   Flop_counter;
18 | }
19 | 
20 | //typdefs for backwards compatibility to CTF_VERSION 10x
21 | typedef CTF::World CTF_World;
22 | typedef CTF::World cCTF_World;
23 | template <typename dtype>
24 | class tCTF_World : public CTF::World { 
25 |   public:
26 |     tCTF_World(int argc, char * const * argv) : CTF::World(argc, argv){}
27 |     tCTF_World(MPI_Comm       comm = MPI_COMM_WORLD,
28 |                int            argc = 0,
29 |                char * const * argv = NULL) : CTF::World(comm, argc, argv){}
30 |     tCTF_World(int            order, 
31 |                int const *    lens, 
32 |                MPI_Comm       comm = MPI_COMM_WORLD,
33 |                int            argc = 0,
34 |                char * const * argv = NULL) : CTF::World(order, lens, comm, argc, argv){}
35 | 
36 | };
37 | 
38 | typedef CTF::Tensor<>  CTF_Tensor;
39 | typedef CTF::Matrix<>  CTF_Matrix;
40 | typedef CTF::Vector<>  CTF_Vector;
41 | typedef CTF::Scalar<>  CTF_Scalar;
42 | typedef CTF::Idx_Tensor  CTF_Idx_Tensor;
43 | typedef CTF::Tensor< std::complex<double> > cCTF_Tensor;
44 | typedef CTF::Matrix< std::complex<double> > cCTF_Matrix;
45 | typedef CTF::Vector< std::complex<double> > cCTF_Vector;
46 | typedef CTF::Scalar< std::complex<double> > cCTF_Scalar;
47 | typedef CTF::Idx_Tensor cCTF_Idx_Tensor;
48 | 
49 | //this needs C++11, possible to do C++03 using struct
50 | template <typename dtype> 
51 | using tCTF_Tensor = CTF::Tensor<dtype>;
52 | template <typename dtype> 
53 | using tCTF_Matrix = CTF::Matrix<dtype>;
54 | template <typename dtype> 
55 | using tCTF_Vector = CTF::Vector<dtype>;
56 | template <typename dtype> 
57 | using tCTF_Scalar = CTF::Scalar<dtype>;
58 | template <typename dtype> 
59 | class tCTF_Idx_Tensor : CTF::Idx_Tensor { };
60 | 
61 | typedef CTF::Timer        CTF_Timer;
62 | typedef CTF::Flop_counter CTF_Flop_Counter;
63 | typedef CTF::Timer_epoch  CTF_Timer_epoch;
64 | 
65 | typedef int64_t long_int;
66 | typedef int64_t key;
67 | 
68 | template <typename dtype> 
69 | using tkv_pair = CTF::Pair<dtype>;
70 | 
71 | typedef tkv_pair<double> kv_pair;
72 | typedef tkv_pair< std::complex<double> > ckv_pair;
73 | 
74 | 
75 | //deprecated
76 | //enum CTF_OP { CTF_OP_SUM, CTF_OP_SUMABS, CTF_OP_SUMSQ, CTF_OP_MAX, CTF_OP_MIN, CTF_OP_MAXABS, CTF_OP_MINABS};
77 | 
78 | 
79 | #endif
80 | 


--------------------------------------------------------------------------------
/test/univar_function.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup univar_function univar_function
  6 |   * @{ 
  7 |   * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | double fquad(double a){
 14 |   return a*a*a*a;
 15 | }
 16 | 
 17 | int univar_function(int     n,
 18 |                     World & dw){
 19 |   
 20 |   int shapeN4[] = {NS,NS,NS,NS};
 21 |   int sizeN4[] = {n+1,n,n+2,n+3};
 22 | 
 23 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 24 | 
 25 |   srand48(dw.rank);
 26 |   A.fill_random(-.5, .5);
 27 | 
 28 | 
 29 |   double * all_start_data;
 30 |   int64_t nall;
 31 |   A.read_all(&nall, &all_start_data);
 32 | 
 33 | 
 34 |   //CTF::Function<> ufun(&fquad);
 35 |   CTF::Function<> ufun([](double a){ return a*a*a*a; });
 36 |   // below is equivalent to A.scale(1.0, "ijkl", ufun);
 37 |   .5*A["ijkl"]+=ufun(.5*A["ijkl"]);
 38 | 
 39 |   double * all_end_data;
 40 |   int64_t nall2;
 41 |   A.read_all(&nall2, &all_end_data);
 42 | 
 43 |   int pass = (nall == nall2);
 44 |   if (pass){
 45 |     for (int64_t i=0; i<nall; i++){
 46 |       if (fabs(.5*all_start_data[i]+fquad(.5*all_start_data[i])-all_end_data[i])>=1.E-6) pass =0;
 47 |     }
 48 |   } 
 49 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 50 | 
 51 |   if (dw.rank == 0){
 52 |     if (pass){
 53 |       printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } passed\n");
 54 |     } else {
 55 |       printf("{ A[\"ijkl\"] = A[\"ijkl\"]^3 } failed\n");
 56 |     }
 57 |   } 
 58 | 
 59 |   delete [] all_start_data;
 60 |   delete [] all_end_data;
 61 |   
 62 |   return pass;
 63 | } 
 64 | 
 65 | 
 66 | #ifndef TEST_SUITE
 67 | 
 68 | char* getCmdOption(char ** begin,
 69 |                    char ** end,
 70 |                    const   std::string & option){
 71 |   char ** itr = std::find(begin, end, option);
 72 |   if (itr != end && ++itr != end){
 73 |     return *itr;
 74 |   }
 75 |   return 0;
 76 | }
 77 | 
 78 | 
 79 | int main(int argc, char ** argv){
 80 |   int rank, np, n;
 81 |   int const in_num = argc;
 82 |   char ** input_str = argv;
 83 | 
 84 |   MPI_Init(&argc, &argv);
 85 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 86 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 87 | 
 88 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 89 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 90 |     if (n < 0) n = 5;
 91 |   } else n = 5;
 92 | 
 93 | 
 94 |   {
 95 |     World dw(MPI_COMM_WORLD, argc, argv);
 96 | 
 97 |     if (rank == 0){
 98 |       printf("Computing univar_function A_ijkl = f(A_ijkl)\n");
 99 |     }
100 |     univar_function(n, dw);
101 |   }
102 | 
103 | 
104 |   MPI_Finalize();
105 |   return 0;
106 | }
107 | 
108 | /**
109 |  * @} 
110 |  * @}
111 |  */
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/test/multi_tsr_sym.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | /** \addtogroup tests 
  3 |   * @{ 
  4 |   * \defgroupmulti_tsr_sym
  5 |   * @{ 
  6 |   * \brief Matrix multiplication
  7 |   */
  8 | 
  9 | #include <ctf.hpp>
 10 | 
 11 | using namespace CTF;
 12 | 
 13 | int multi_tsr_sym(int     m,
 14 |                   int     n,
 15 |                   World & dw){
 16 |   int rank, i, num_pes;
 17 |   int64_t np;
 18 |   double * pairs;
 19 |   int64_t * indices;
 20 |   
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 22 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 23 | 
 24 | #ifndef TEST_SUITE
 25 |   if (rank == 0)
 26 |     printf("m = %d, n = %d, p = %d\n", 
 27 |             m,n,num_pes);
 28 | #endif
 29 |   
 30 |   //* Creates distributed tensors initialized with zeros
 31 |   Matrix<> A(n, m, NS, dw);
 32 |   Matrix<> C_NS(n, n, NS, dw);
 33 |   Matrix<> C_SY(n, n, SY, dw);
 34 |   Matrix<> diff(n, n, NS, dw);
 35 | 
 36 |   srand48(13*rank);
 37 |   //* Writes noise to local data based on global index
 38 |   A.get_local_data(&np, &indices, &pairs);
 39 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 40 |   A.write(np, indices, pairs);
 41 |   delete [] pairs;
 42 |   free(indices);
 43 | 
 44 |   C_NS["ij"] = A["ik"]*A["jk"];
 45 |   C_SY["ij"] = A["ik"]*A["jk"];
 46 | 
 47 |   diff["ij"] = C_SY["ij"] - C_NS["ij"];
 48 | 
 49 |   double err = diff.norm2();
 50 |   if (rank == 0){
 51 |     if (err < 1.E-6)
 52 |       printf("{ A[\"ik\"]*A[\"jk\"] = C_NS[\"ij\"] = C_AS[\"ij\"] } passed.\n");
 53 |     else 
 54 |       printf("{ A[\"ik\"]*A[\"jk\"] = C_NS[\"ij\"] = C_AS[\"ij\"] } failed!\n");
 55 |   }
 56 | 
 57 |   return err < 1.E-6;
 58 | } 
 59 | 
 60 | 
 61 | #ifndef TEST_SUITE
 62 | char* getCmdOption(char ** begin,
 63 |                    char ** end,
 64 |                    const   std::string & option){
 65 |   char ** itr = std::find(begin, end, option);
 66 |   if (itr != end && ++itr != end){
 67 |     return *itr;
 68 |   }
 69 |   return 0;
 70 | }
 71 | 
 72 | int main(int argc, char ** argv){
 73 |   int rank, np, n, m, pass;
 74 |   int in_num = argc;
 75 |   char ** input_str = argv;
 76 | 
 77 |   MPI_Init(&argc, &argv);
 78 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 79 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 80 | 
 81 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 82 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 83 |     if (n < 0) n = 7;
 84 |   } else n = 7;
 85 |   if (getCmdOption(input_str, input_str+in_num, "-m")){
 86 |     m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
 87 |     if (m < 0) m = 7;
 88 |   } else m = 7;
 89 |   
 90 |   {
 91 |     World dw(MPI_COMM_WORLD, argc, argv);
 92 | 
 93 |     pass =multi_tsr_sym(m, n, dw);
 94 |     assert(pass);
 95 |   }
 96 | 
 97 |   MPI_Finalize();
 98 |   return 0;
 99 | }
100 | /**
101 |  * @} 
102 |  * @}
103 |  */
104 | 
105 | 
106 | #endif
107 | 


--------------------------------------------------------------------------------
/test/diag_sym.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup diag_sym diag_sym 
  6 |   * @{ 
  7 |   * \brief Summation along tensor diagonals
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | 
 12 | using namespace CTF;
 13 | 
 14 | int diag_sym(int     n,
 15 |              World & dw){
 16 |   int rank, i, num_pes, pass;
 17 |   int64_t np;
 18 |   double * pairs;
 19 |   int64_t * indices;
 20 |   
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 22 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 23 | 
 24 | 
 25 |   int shapeN4[] = {SY,NS,SY,NS};
 26 |   int sizeN4[] = {n,n,n,n};
 27 | 
 28 |   //* Creates distributed tensors initialized with zeros
 29 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 30 |   Tensor<> B(4, sizeN4, shapeN4, dw);
 31 |   Tensor<> C(4, sizeN4, shapeN4, dw);
 32 | 
 33 |   srand48(13*rank);
 34 | 
 35 |   Matrix<> mA(n,n,NS,dw);
 36 |   Matrix<> mB(n,n,NS,dw);
 37 |   mA.get_local_data(&np, &indices, &pairs);
 38 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 39 |   mA.write(np, indices, pairs);
 40 |   delete [] pairs;
 41 |   free(indices);
 42 |   mB.get_local_data(&np, &indices, &pairs);
 43 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 44 |   mB.write(np, indices, pairs);
 45 |   delete [] pairs;
 46 |   free(indices);
 47 | 
 48 |   A["abij"] = mA["ii"];
 49 |   B["abij"] = mA["jj"];
 50 |   A["abij"] -= mB["aa"];
 51 |   B["abij"] -= mB["bb"];
 52 |   C["abij"] = A["abij"]-B["abij"];
 53 | 
 54 |   double norm = C.norm2();
 55 |   
 56 |   if (norm < 1.E-10){
 57 |     pass = 1;
 58 |     if (rank == 0)
 59 |       printf("{ (A[\"(ab)(ij)\"]=mA[\"ii\"]-mB[\"aa\"]=mA[\"jj\"]-mB[\"bb\"] } passed \n");
 60 |   } else {
 61 |     pass = 0;
 62 |     if (rank == 0)
 63 |       printf("{ (A[\"(ab)(ij)\"]=mA[\"ii\"]-mB[\"aa\"]=mA[\"jj\"]-mB[\"bb\"] } failed \n");
 64 |   }
 65 |   return pass;
 66 | } 
 67 | 
 68 | 
 69 | #ifndef TEST_SUITE
 70 | char* getCmdOption(char ** begin,
 71 |                    char ** end,
 72 |                    const   std::string & option){
 73 |   char ** itr = std::find(begin, end, option);
 74 |   if (itr != end && ++itr != end){
 75 |     return *itr;
 76 |   }
 77 |   return 0;
 78 | }
 79 | 
 80 | 
 81 | int main(int argc, char ** argv){
 82 |   int rank, np, n;
 83 |   int in_num = argc;
 84 |   char ** input_str = argv;
 85 | 
 86 |   MPI_Init(&argc, &argv);
 87 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 88 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 89 | 
 90 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 91 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 92 |     if (n < 0) n = 7;
 93 |   } else n = 7;
 94 | 
 95 | 
 96 |   {
 97 |     World dw(argc, argv);
 98 |     diag_sym(n, dw);
 99 |   }
100 | 
101 |   MPI_Finalize();
102 |   return 0;
103 | }
104 | /**
105 |  * @} 
106 |  * @}
107 |  */
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/test/repack.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup repack repack 
  6 |   * @{ 
  7 |   * \brief Tests contraction of a symmetric index group with a nonsymmetric one
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | 
 12 | using namespace CTF;
 13 | 
 14 | int repack(int     n,
 15 |            World & dw){
 16 |   int rank, i, num_pes, pass;
 17 |   int64_t np;
 18 |   double * pairs;
 19 |   int64_t * indices;
 20 |   
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 22 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 23 | 
 24 | 
 25 |   int shapeN4[] = {NS,NS,NS,NS};
 26 |   int shapeS4[] = {NS,NS,SY,NS};
 27 |   int sizeN4[] = {n,n,n,n};
 28 | 
 29 |   //* Creates distributed tensors initialized with zeros
 30 |   Tensor<> An(4, sizeN4, shapeN4, dw);
 31 |   Tensor<> As(4, sizeN4, shapeS4, dw);
 32 | 
 33 |   As.get_local_data(&np, &indices, &pairs);
 34 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 35 |   As.write(np, indices, pairs);
 36 |   An.write(np, indices, pairs);
 37 | 
 38 |   Tensor<> Anr(An, shapeS4);
 39 |  
 40 |   Anr["ijkl"] -= As["ijkl"];
 41 | 
 42 |   double norm = Anr.norm2();
 43 | 
 44 |   if (norm < 1.E-6)
 45 |     pass = 1;
 46 |   else
 47 |     pass = 0;
 48 |   
 49 |   if (!pass)
 50 |     printf("{ NS -> SY repack } failed \n");
 51 |   else {
 52 |     Tensor<> Anur(As, shapeN4);
 53 |     Tensor<> Asur(As, shapeN4);
 54 |     Asur["ijkl"] = 0.0;
 55 |     Asur.write(np, indices, pairs);
 56 |     Anur["ijkl"] -= Asur["ijkl"];
 57 | 
 58 |     norm = Anur.norm2();
 59 | 
 60 |     if (norm < 1.E-6){
 61 |       pass = 1;
 62 |       if (rank == 0)
 63 |         printf("{ NS -> SY -> NS repack } passed \n");
 64 |     } else {
 65 |       pass = 0;
 66 |       if (rank == 0)
 67 |         printf("{ SY -> NS repack } failed \n");
 68 |     }
 69 | 
 70 |   }
 71 |   delete [] pairs;
 72 |   free(indices);
 73 |   return pass;
 74 | } 
 75 | 
 76 | 
 77 | #ifndef TEST_SUITE
 78 | char* getCmdOption(char ** begin,
 79 |                    char ** end,
 80 |                    const   std::string & option){
 81 |   char ** itr = std::find(begin, end, option);
 82 |   if (itr != end && ++itr != end){
 83 |     return *itr;
 84 |   }
 85 |   return 0;
 86 | }
 87 | 
 88 | 
 89 | int main(int argc, char ** argv){
 90 |   int rank, np, n;
 91 |   int in_num = argc;
 92 |   char ** input_str = argv;
 93 | 
 94 |   MPI_Init(&argc, &argv);
 95 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 96 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 97 | 
 98 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 99 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
100 |     if (n < 0) n = 7;
101 |   } else n = 7;
102 | 
103 | 
104 |   {
105 |     World dw(argc, argv);
106 |     repack(n, dw);
107 |   }
108 | 
109 |   MPI_Finalize();
110 |   return 0;
111 | }
112 | /**
113 |  * @} 
114 |  * @}
115 |  */
116 | 
117 | #endif
118 | 


--------------------------------------------------------------------------------
/studies/fast_3mm.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | #include <ctf.hpp>
  4 | 
  5 | using namespace CTF;
  6 | 
  7 | int fast_diagram(int const     n,
  8 |                  World    &ctf){
  9 |   int rank, i, num_pes;
 10 |   
 11 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 12 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 13 | 
 14 | 
 15 |   Matrix<> T(n,n,NS,ctf);
 16 |   Matrix<> V(n,n,NS,ctf);
 17 |   Matrix<> Z_SY(n,n,SY,ctf);
 18 |   Matrix<> Z_AS(n,n,AS,ctf);
 19 |   Matrix<> Z_NS(n,n,NS,ctf);
 20 |   Vector<> Z_D(n,ctf);
 21 |   Matrix<> W(n,n,SH,ctf);
 22 |   Matrix<> W_ans(n,n,SH,ctf);
 23 | 
 24 |   int64_t * indices;
 25 |   double * values;
 26 |   int64_t size;
 27 |   srand48(173*rank);
 28 | 
 29 |   T.read_local(&size, &indices, &values);
 30 |   for (i=0; i<size; i++){
 31 |     values[i] = drand48();
 32 |   }
 33 |   T.write(size, indices, values);
 34 |   free(indices);
 35 |   free(values);
 36 |   V.read_local(&size, &indices, &values);
 37 |   for (i=0; i<size; i++){
 38 |     values[i] = drand48();
 39 |   }
 40 |   V.write(size, indices, values);
 41 |   free(indices);
 42 |   free(values);
 43 |   Z_NS["af"] = T["ae"]*V["ef"];
 44 |   W_ans["ab"] = Z_NS["af"]*T["fb"];
 45 |   Z_AS["af"] = T["ae"]*V["ef"];
 46 |   Z_SY["af"] = T["ae"]*V["ef"];
 47 | 
 48 |   W["ab"] = .5*Z_SY["af"]*T["fb"];
 49 |   W["ab"] += .5*Z_SY["aa"]*T["ab"];
 50 |   W["ab"] += .5*Z_AS["af"]*T["fb"];
 51 |   W["ab"] -= W_ans["ab"];
 52 | 
 53 |   int pass = (W.norm2() <=1.E-10);
 54 |   
 55 |   if (rank == 0){
 56 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 57 |     if (pass) printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } passed\n");
 58 |     else      printf("{ C[\"(ij)\"] = A[\"(ik)\"]*B[\"(kj)\"] } failed\n");
 59 |   } else 
 60 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 61 |   return pass;
 62 | }
 63 | 
 64 | #ifndef TEST_SUITE
 65 | char* getCmdOption(char ** begin,
 66 |                    char ** end,
 67 |                    const   std::string & option){
 68 |   char ** itr = std::find(begin, end, option);
 69 |   if (itr != end && ++itr != end){
 70 |     return *itr;
 71 |   }
 72 |   return 0;
 73 | }
 74 | 
 75 | 
 76 | int main(int argc, char ** argv){
 77 |   int rank, np, n;
 78 |   int const in_num = argc;
 79 |   char ** input_str = argv;
 80 | 
 81 |   MPI_Init(&argc, &argv);
 82 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 83 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 84 | 
 85 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 86 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 87 |     if (n < 0) n = 5;
 88 |   } else n = 5;
 89 | 
 90 |   {
 91 |     World dw(MPI_COMM_WORLD, argc, argv);
 92 |     if (rank == 0){
 93 |       printf("Computing W^(ab)=sum_(ef)T^(ae)*V^(ef)*T^(fb)\n");
 94 |     }
 95 |     int pass = fast_diagram(n, dw);
 96 |     assert(pass);
 97 |   }
 98 |   
 99 |   MPI_Finalize();
100 |   return 0;
101 | }
102 | #endif
103 | 
104 | 


--------------------------------------------------------------------------------
/test/readall_test.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup readall_test readall_test
  6 |   * @{ 
  7 |   * \brief Summation along tensor diagonals
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | 
 12 | using namespace CTF;
 13 | 
 14 | int readall_test(int   n,
 15 |                  int   m,
 16 |                  World &dw){
 17 |   int rank, i, num_pes, pass;
 18 |   
 19 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 20 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 21 | 
 22 | 
 23 |   int shapeN4[] = {NS,NS,NS,NS};
 24 |   int sizeN4[] = {n,m,n,m};
 25 | 
 26 |   //* Creates distributed tensors initialized with zeros
 27 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 28 | 
 29 |   std::vector<double> vals;
 30 |   std::vector<int64_t> inds;
 31 |   if (rank == 0){
 32 |     World sw(MPI_COMM_SELF);
 33 |     
 34 |     Tensor<> sA(4, sizeN4, shapeN4, sw);
 35 | 
 36 |     
 37 |     if (rank == 0){ 
 38 |       srand48(13*rank);
 39 |       for (i=0; i<n*m*n*m; i++){
 40 |         vals.push_back(drand48());
 41 |         inds.push_back(i);
 42 |       }
 43 |     }
 44 |     
 45 |     sA[inds] = vals;
 46 | 
 47 |     A.add_from_subworld(&sA);
 48 |   } else 
 49 |     A.add_from_subworld(NULL);
 50 | 
 51 |   double * vs;
 52 |   int64_t ns;
 53 | 
 54 |   A.read_all(&ns, &vs);
 55 | 
 56 |   assert(ns == n*n*m*m);
 57 |   
 58 | 
 59 |   pass = 1;
 60 |   if (rank == 0){
 61 |     for (i=0; i<ns; i++){
 62 |       if (fabs(vs[i]-vals[i])>1.E-10)
 63 |         pass = 0;
 64 |     }
 65 |   }
 66 |   delete [] vs;
 67 | 
 68 |   if (pass){
 69 |     if (rank == 0)
 70 |       printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } passed \n");
 71 |   } else {
 72 |     if (rank == 0)
 73 |       printf("{ sum(ai)A[\"aiai\"]=sum(ai)mA[\"ai\"] } failed \n");
 74 |   }
 75 |   
 76 | 
 77 |   return pass;
 78 | } 
 79 | 
 80 | 
 81 | #ifndef TEST_SUITE
 82 | char* getCmdOption(char ** begin,
 83 |                    char ** end,
 84 |                    const   std::string & option){
 85 |   char ** itr = std::find(begin, end, option);
 86 |   if (itr != end && ++itr != end){
 87 |     return *itr;
 88 |   }
 89 |   return 0;
 90 | }
 91 | 
 92 | 
 93 | int main(int argc, char ** argv){
 94 |   int rank, np, n, m;
 95 |   int in_num = argc;
 96 |   char ** input_str = argv;
 97 | 
 98 |   MPI_Init(&argc, &argv);
 99 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
100 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
101 | 
102 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
103 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
104 |     if (n < 0) n = 7;
105 |   } else n = 7;
106 | 
107 |   if (getCmdOption(input_str, input_str+in_num, "-m")){
108 |     m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
109 |     if (m < 0) m = 9;
110 |   } else m = 9;
111 | 
112 | 
113 | 
114 |   {
115 |     World dw(argc, argv);
116 |     readall_test(n, m, dw);
117 |   }
118 | 
119 |   MPI_Finalize();
120 |   return 0;
121 | }
122 | #endif
123 | /**
124 |  * @} 
125 |  * @}
126 |  */
127 | 
128 | 


--------------------------------------------------------------------------------
/src/interface/ring.h:
--------------------------------------------------------------------------------
 1 | #ifndef __RING_H__
 2 | #define __RING_H__
 3 | 
 4 | #include "../tensor/algstrct.h"
 5 | 
 6 | namespace CTF {
 7 | 
 8 |   /**
 9 |    * \addtogroup algstrct 
10 |    * @{
11 |    */
12 |   /**
13 |    * \brief Ring class defined by a datatype and addition and multiplicaton functions
14 |    *   addition must have an identity, inverse, and be associative, does not need to be commutative
15 |    *   multiplications must have an identity and be distributive
16 |    */
17 |   template <typename dtype=double, bool is_ord=CTF_int::get_default_is_ord<dtype>()>
18 |   class Ring : public Semiring<dtype, is_ord> {
19 |     public:
20 |       Ring(Ring const & other) : Semiring<dtype, is_ord>(other) { 
21 |         this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs<dtype, is_ord> >;
22 |       }
23 |       /** 
24 |        * \brief default constructor valid for only certain types:
25 |        *         bool, int, unsigned int, int64_t, uint64_t,
26 |        *         float, double, std::complex<float>, std::complex<double>
27 |        */
28 |       Ring() : Semiring<dtype, is_ord>() { 
29 |         this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs<dtype, is_ord> >;
30 |       }
31 | 
32 |       virtual CTF_int::algstrct * clone() const {
33 |         return new Ring<dtype, is_ord>(*this);
34 |       }
35 | 
36 |       /**
37 |        * \brief constructor for algstrct equipped with * and +
38 |        * \param[in] addid_ additive identity
39 |        * \param[in] fadd_ binary addition function
40 |        * \param[in] addmop_ MPI_Op operation for addition
41 |        * \param[in] mulid_ multiplicative identity
42 |        * \param[in] fmul_ binary multiplication function
43 |        * \param[in] gemm_ block matrix multiplication function
44 |        * \param[in] axpy_ vector sum function
45 |        * \param[in] scal_ vector scale function
46 |        */
47 |       Ring(dtype        addid_,
48 |            dtype (*fadd_)(dtype a, dtype b),
49 |            MPI_Op       addmop_,
50 |            dtype        mulid_,
51 |            dtype (*fmul_)(dtype a, dtype b),
52 |            void (*gemm_)(char,char,int,int,int,dtype,dtype const*,dtype const*,dtype,dtype*)=NULL,
53 |            void (*axpy_)(int,dtype,dtype const*,int,dtype*,int)=NULL,
54 |            void (*scal_)(int,dtype,dtype*,int)=NULL)
55 |             : Semiring<dtype,is_ord>(addid_, fadd_, mulid_, addmop_, fmul_, gemm_, axpy_, scal_) {
56 |           this->abs = &CTF_int::char_abs< dtype, CTF_int::default_abs<dtype, is_ord> >;
57 |         }
58 | 
59 |       //treat NULL as mulid
60 |       void safeaddinv(char const * a, char *& b) const {
61 |         if (b==NULL) b = (char*)malloc(this->el_size);
62 |         if (a == NULL){
63 |           
64 |           ((dtype*)b)[0] = -this->tmulid;
65 |         } else {
66 |           ((dtype*)b)[0] = -((dtype*)a)[0];
67 |         }
68 |       }
69 | 
70 |         void addinv(char const * a, char * b) const {
71 |           ((dtype*)b)[0] = -((dtype*)a)[0];
72 |         }
73 | 
74 |   };
75 |   /**
76 |    * @}
77 |    */
78 | 
79 | }
80 | 
81 | #endif
82 | 


--------------------------------------------------------------------------------
/test/speye.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup tests 
  2 |   * @{ 
  3 |   * \defgroup speye speye
  4 |   * @{ 
  5 |   * \brief Sparse identity matrix test
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | using namespace CTF;
 10 | 
 11 | int speye(int     n,
 12 |           int     order,
 13 |           World & dw){
 14 | 
 15 |   int shape[order];
 16 |   int size[order];
 17 |   char idx_rep[order+1];
 18 |   idx_rep[order]='\0';
 19 |   char idx_chg[order+1];
 20 |   idx_chg[order]='\0';
 21 |   for (int i=0; i<order; i++){
 22 |     if (i!=order-1)
 23 |       shape[i] = NS;
 24 |     else
 25 |       shape[i] = NS;
 26 |     size[i] = n;
 27 |     idx_rep[i] = 'i';
 28 |     idx_chg[i] = 'i'+i;
 29 |   }
 30 | 
 31 |   // Create distributed sparse matrix
 32 |   Tensor<> A(order, true, size, shape, dw);
 33 | 
 34 |   A[idx_rep] = 1.0;
 35 |   
 36 | /*  if (order == 3){
 37 |     int ns[] = {n,n,n};
 38 |     int sy[] = {SY,SY,NS};
 39 |     Tensor<> AA(3, ns, sy, dw);
 40 |     AA.fill_random(0.0,1.0);
 41 |     A["ijk"] += AA["ijk"];
 42 |     AA["ijk"] += A["ijk"];
 43 |     AA["ijk"] += A["ijk"];
 44 |   }*/
 45 | 
 46 |   /*if (dw.rank == 0)
 47 |     printf("PRINTING\n");
 48 |   A.print();*/
 49 | 
 50 |   double sum1 = A[idx_chg];
 51 |   double sum2 = A[idx_rep];
 52 | 
 53 |   int pass = (fabs(sum1-n)<1.E-9) & (fabs(sum2-n)<1.E-9);
 54 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 55 |   if (dw.rank == 0){
 56 |     if (pass) 
 57 |       printf("{ A is sparse; A[\"iii...\"]=1; sum(A) = range of i } passed \n");
 58 |     else
 59 |       printf("{ A is sparse; A[\"iii...\"]=1; sum(A) = range of i } failed \n");
 60 |   }
 61 |   return pass;
 62 | } 
 63 | 
 64 | 
 65 | #ifndef TEST_SUITE
 66 | char* getCmdOption(char ** begin,
 67 |                    char ** end,
 68 |                    const   std::string & option){
 69 |   char ** itr = std::find(begin, end, option);
 70 |   if (itr != end && ++itr != end){
 71 |     return *itr;
 72 |   }
 73 |   return 0;
 74 | }
 75 | 
 76 | 
 77 | int main(int argc, char ** argv){
 78 |   int rank, np, n, pass, order;
 79 |   int const in_num = argc;
 80 |   char ** input_str = argv;
 81 | 
 82 |   MPI_Init(&argc, &argv);
 83 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 84 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 85 | 
 86 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 87 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 88 |     if (n < 0) n = 7;
 89 |   } else n = 7;
 90 | 
 91 |   if (getCmdOption(input_str, input_str+in_num, "-order")){
 92 |     order = atoi(getCmdOption(input_str, input_str+in_num, "-order"));
 93 |     if (order < 0) order = 3;
 94 |   } else order = 3;
 95 | 
 96 |   {
 97 |     World dw(argc, argv);
 98 | 
 99 |     if (rank == 0){
100 |       printf("Computing sum of I where I is an identity tensor of order %d and dimension %d stored sparse\n", order, n);
101 |     }
102 |     pass = speye(n, order, dw);
103 |     assert(pass);
104 |   }
105 | 
106 |   MPI_Finalize();
107 |   return 0;
108 | }
109 | /**
110 |  * @} 
111 |  * @}
112 |  */
113 | 
114 | #endif
115 | 


--------------------------------------------------------------------------------
/test/bivar_function.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup bivar_function bivar_function
  6 |   * @{ 
  7 |   * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | double f2(double a, double b){
 14 |   return a*b+b*a;
 15 | }
 16 | 
 17 | int bivar_function(int     n,
 18 |                    World & dw){
 19 |   
 20 |   int shapeN4[] = {NS,NS,NS,NS};
 21 |   int sizeN4[] = {n+1,n,n+2,n+3};
 22 | 
 23 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 24 |   Tensor<> B(4, sizeN4, shapeN4, dw);
 25 | 
 26 |   srand48(dw.rank);
 27 |   A.fill_random(-.5, .5);
 28 |   B.fill_random(-.5, .5);
 29 | 
 30 | 
 31 |   double * all_start_data_A;
 32 |   int64_t nall_A;
 33 |   A.read_all(&nall_A, &all_start_data_A);
 34 |   double * all_start_data_B;
 35 |   int64_t nall_B;
 36 |   B.read_all(&nall_B, &all_start_data_B);
 37 | 
 38 |   CTF::Function<> bfun([](double a, double b){ return a*b + b*a; });
 39 |   .5*A["ijkl"]+=bfun(A["ijkl"],B["ijkl"]);
 40 | 
 41 |   double * all_end_data_A;
 42 |   int64_t nall2_A;
 43 |   A.read_all(&nall2_A, &all_end_data_A);
 44 | 
 45 |   int pass = (nall_A == nall2_A);
 46 |   if (pass){
 47 |     for (int64_t i=0; i<nall_A; i++){
 48 |       if (fabs(.5*all_start_data_A[i]+f2(all_start_data_A[i],all_start_data_B[i])-all_end_data_A[i])>=1.E-6) pass =0;
 49 |     }
 50 |   } 
 51 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 52 | 
 53 |   if (dw.rank == 0){
 54 |     if (pass){
 55 |       printf("{ A[\"ijkl\"] = f2(A[\"ijkl\"], B[\"ijkl\"]) } passed\n");
 56 |     } else {
 57 |       printf("{ A[\"ijkl\"] = f2(A[\"ijkl\"], B[\"ijkl\"]) } failed\n");
 58 |     }
 59 |   } 
 60 | 
 61 |   delete [] all_start_data_A;
 62 |   delete [] all_end_data_A;
 63 |   delete [] all_start_data_B;
 64 |   
 65 |   return pass;
 66 | } 
 67 | 
 68 | 
 69 | #ifndef TEST_SUITE
 70 | 
 71 | char* getCmdOption(char ** begin,
 72 |                    char ** end,
 73 |                    const   std::string & option){
 74 |   char ** itr = std::find(begin, end, option);
 75 |   if (itr != end && ++itr != end){
 76 |     return *itr;
 77 |   }
 78 |   return 0;
 79 | }
 80 | 
 81 | 
 82 | int main(int argc, char ** argv){
 83 |   int rank, np, n;
 84 |   int const in_num = argc;
 85 |   char ** input_str = argv;
 86 | 
 87 |   MPI_Init(&argc, &argv);
 88 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 89 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 90 | 
 91 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 92 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 93 |     if (n < 0) n = 5;
 94 |   } else n = 5;
 95 | 
 96 | 
 97 |   {
 98 |     World dw(MPI_COMM_WORLD, argc, argv);
 99 | 
100 |     if (rank == 0){
101 |       printf("Computing bivar_function A_ijkl = f(B_ijkl, A_ijkl)\n");
102 |     }
103 |     bivar_function(n, dw);
104 |   }
105 | 
106 | 
107 |   MPI_Finalize();
108 |   return 0;
109 | }
110 | 
111 | /**
112 |  * @} 
113 |  * @}
114 |  */
115 | 
116 | #endif
117 | 


--------------------------------------------------------------------------------
/src/interface/timer.h:
--------------------------------------------------------------------------------
  1 | #ifndef __TIMER_H__
  2 | #define __TIMER_H__
  3 | 
  4 | #include "common.h"
  5 | 
  6 | 
  7 | namespace CTF {
  8 | /**
  9 |  * \defgroup timer Timing and cost measurement
 10 |  * \addtogroup timer
 11 |  * @{
 12 |  */
 13 | 
 14 |   #define MAX_NAME_LENGTH 53
 15 |       
 16 |   /**
 17 |    * \brief times a specific symbol
 18 |    */
 19 |   class Function_timer{
 20 |     public:
 21 |       char name[MAX_NAME_LENGTH];
 22 |       double start_time;
 23 |       double start_excl_time;
 24 |       double acc_time;
 25 |       double acc_excl_time;
 26 |       int calls;
 27 | 
 28 |       double total_time;
 29 |       double total_excl_time;
 30 |       int total_calls;
 31 | 
 32 |     public: 
 33 |       Function_timer(char const * name_, 
 34 |                      double const start_time_,
 35 |                      double const start_excl_time_);
 36 |       //Function_timer(Function_timer const & other);
 37 |       //~Function_timer();
 38 |       void compute_totals(MPI_Comm comm);
 39 |       bool operator<(Function_timer const & w) const ;
 40 |       void print(FILE *         output, 
 41 |                  MPI_Comm const comm, 
 42 |                  int const      rank,
 43 |                  int const      np);
 44 |   };
 45 | 
 46 | 
 47 |   /**
 48 |    * \brief local process walltime measurement
 49 |    */
 50 |   class Timer{
 51 |     public:
 52 |       char const * timer_name;
 53 |       int index;
 54 |       int exited;
 55 |       int original;
 56 |     
 57 |     public:
 58 |       Timer(char const * name);
 59 |       ~Timer();
 60 |       void stop();
 61 |       void start();
 62 |       void exit();
 63 |       
 64 |   };
 65 | 
 66 |   /**
 67 |    * \brief epoch during which to measure timers
 68 |    */
 69 |   class Timer_epoch{
 70 |     private:
 71 |       Timer * tmr_inner;
 72 |       Timer * tmr_outer;
 73 |       double save_excl_time;
 74 |       std::vector<Function_timer> saved_function_timers;
 75 |     public:
 76 |       char const * name;
 77 |       //create epoch called name
 78 |       Timer_epoch(char const * name_);
 79 | 
 80 |       ~Timer_epoch(){
 81 |         saved_function_timers.clear();
 82 |       }
 83 |       
 84 |       //clears timers and begins epoch
 85 |       void begin();
 86 | 
 87 |       //prints timers and clears them
 88 |       void end();
 89 |   };
 90 | 
 91 | 
 92 |   /**
 93 |    * \brief a term is an abstract object representing some expression of tensors
 94 |    */
 95 | 
 96 |   /**
 97 |    * \brief measures flops done in a code region
 98 |    */
 99 |   class Flop_counter{
100 |     public:
101 |       int64_t  start_count;
102 | 
103 |     public:
104 |       /**
105 |        * \brief constructor, starts counter
106 |        */
107 |       Flop_counter();
108 |       ~Flop_counter();
109 | 
110 |       /**
111 |        * \brief restarts counter
112 |        */
113 |       void zero();
114 | 
115 |       /**
116 |        * \brief get total flop count over all counters in comm
117 |        */
118 |       int64_t count(MPI_Comm comm = MPI_COMM_SELF);
119 | 
120 |   };
121 | 
122 | /**
123 |  * @}
124 |  */
125 | }
126 | 
127 | 
128 | #endif
129 | 
130 | 


--------------------------------------------------------------------------------
/examples/checkpoint.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2016, Edgar Solomonik, all rights reserved.*/
  2 | /** \addtogroup examples 
  3 |   * @{ 
  4 |   * \defgroup checkpoint checkpoint 
  5 |   * @{ 
  6 |   * \brief tests read and write dense data to file functionality
  7 |   */
  8 | 
  9 | #include <ctf.hpp>
 10 | using namespace CTF;
 11 | 
 12 | int checkpoint(int     n,
 13 |                World & dw,
 14 |                int     qtf=NS){
 15 | 
 16 |   Matrix<> A(n, n, qtf, dw);
 17 |   Matrix<> A2(n, n, qtf, dw);
 18 |   Matrix<> A3(n, n, qtf, dw);
 19 |   Matrix<> A4(n, n, qtf, dw);
 20 |   Matrix<> A5(n, n, qtf, dw);
 21 | 
 22 |   srand48(13*dw.rank);
 23 |   A.fill_random(0.0,1.0);
 24 |   A.print();
 25 |   A["ii"] = 0.0;
 26 |   A2["ij"] = A["ij"];
 27 |   A3["ij"] = 2.*A["ij"];
 28 |  
 29 |   MPI_File file;
 30 |   MPI_File_open(dw.comm, "CTF_checkpoint_test_file.bin",  MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &file);
 31 |   A2.write_dense_to_file(file);
 32 |   A3.write_dense_to_file(file,n*n*sizeof(double));
 33 |   MPI_File_close(&file);
 34 |   
 35 |   MPI_File_open(dw.comm, "CTF_checkpoint_test_file.bin",  MPI_MODE_RDONLY | MPI_MODE_DELETE_ON_CLOSE, MPI_INFO_NULL, &file);
 36 |   A4.read_dense_from_file(file);
 37 |  
 38 |   A4.print(); 
 39 |   A["ij"] -= A4["ij"];
 40 |   int pass = A.norm2() <= 1.e-9*n; 
 41 |   
 42 |   A5.read_dense_from_file(file,n*n*sizeof(double));
 43 |   MPI_File_close(&file);
 44 |   A5["ij"] -= 2.*A4["ij"];
 45 |   pass = pass & (A5.norm2() <= 1.e-9*n); 
 46 |     
 47 |   if (dw.rank == 0){
 48 |     if (!pass){
 49 |       printf("{ checkpointing using dense data representation with qtf=%d } failed\n",qtf);
 50 |     } else {
 51 |       printf("{ checkpointing using dense data representation with qtf=%d } passed\n",qtf);
 52 |     }
 53 |   }
 54 |   return pass;
 55 |   
 56 | } 
 57 | 
 58 | 
 59 | #ifndef TEST_SUITE
 60 | char* getCmdOption(char ** begin,
 61 |                  char ** end,
 62 |                  const   std::string & option){
 63 |   char ** itr = std::find(begin, end, option);
 64 |   if (itr != end && ++itr != end){
 65 |     return *itr;
 66 |   }
 67 |   return 0;
 68 | }
 69 | 
 70 | 
 71 | int main(int argc, char ** argv){
 72 |   int rank, np, n, qtf;
 73 |   int const in_num = argc;
 74 |   char ** input_str = argv;
 75 | 
 76 |   MPI_Init(&argc, &argv);
 77 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 78 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 79 | 
 80 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 81 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 82 |     if (n < 0) n = 7;
 83 |   } else n = 7;
 84 | 
 85 |   if (getCmdOption(input_str, input_str+in_num, "-qtf")){
 86 |     qtf = atoi(getCmdOption(input_str, input_str+in_num, "-qtf"));
 87 |     if (qtf < 0) qtf = NS;
 88 |   } else qtf = NS;
 89 | 
 90 | 
 91 | 
 92 |   {
 93 |     World dw(MPI_COMM_WORLD, argc, argv);
 94 |     if (rank == 0){
 95 |       printf("Checking checkpoint calculation n = %d, p = %d, qtf = %d:\n",n,np,qtf);
 96 |     }
 97 |     int pass = checkpoint(n,dw,qtf);
 98 |     assert(pass);
 99 |   }
100 | 
101 |   MPI_Finalize();
102 |   return 0;
103 | }
104 | /**
105 |  * @} 
106 |  * @}
107 |  */
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/src/contraction/ctr_offload.h:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | #ifndef __CTR_OFFLOAD_H__
  4 | #define __CTR_OFFLOAD_H__
  5 | 
  6 | #include "../shared/offload.h"
  7 | #include "ctr_comm.h"
  8 | 
  9 | namespace CTF_int {
 10 |   #ifdef OFFLOAD
 11 |   class ctr_offload : public ctr {
 12 |     public: 
 13 |       /* Class to be called on sub-blocks */
 14 |       ctr * rec_ctr;
 15 |       int64_t size_A;
 16 |       int64_t size_B;
 17 |       int64_t size_C;
 18 |       int iter_counter;
 19 |       int total_iter;
 20 |       int upload_phase_A;
 21 |       int upload_phase_B;
 22 |       int download_phase_C;
 23 |       offload_tsr * ptr_A;
 24 |       offload_tsr * ptr_B;
 25 |       offload_tsr * ptr_C;
 26 |       
 27 |       /**
 28 |        * \brief print ctr object
 29 |        */
 30 |       void print();
 31 | 
 32 |       /**
 33 |        * \brief offloads and downloads local blocks of dense tensors
 34 |        */
 35 |       void run(char * A, char * B, char * C);
 36 | 
 37 |       /**
 38 |        * \brief returns the number of bytes of buffer space
 39 |          we need 
 40 |        * \return bytes needed
 41 |        */
 42 |       int64_t mem_fp();
 43 | 
 44 |       /**
 45 |        * \brief returns the number of bytes of buffer space we need recursively 
 46 |        * \return bytes needed for recursive contraction
 47 |        */
 48 |       int64_t mem_rec();
 49 | 
 50 |       /**
 51 |        * \brief returns the time this kernel will take excluding calls to rec_ctr
 52 |        * \return seconds needed
 53 |        */
 54 |       double est_time_fp(int nlyr);
 55 | 
 56 | 
 57 |       /**
 58 |        * \brief returns the time this kernel will take including calls to rec_ctr
 59 |        * \return seconds needed for recursive contraction
 60 |        */
 61 |       double est_time_rec(int nlyr);
 62 | 
 63 |       /**
 64 |        * \brief copies ctr object
 65 |        */
 66 |       ctr * clone();
 67 | 
 68 |       /**
 69 |        * \brief copies ctr object
 70 |        */
 71 |       ctr_offload(ctr * other);
 72 | 
 73 |       /**
 74 |        * \brief deallocates ctr_offload object
 75 |        */
 76 |       ~ctr_offload();
 77 | 
 78 |       /**
 79 |        * \brief allocates ctr_offload object
 80 |        * \param[in] c contraction object
 81 |        * \param[in] size_A size of the A tensor
 82 |        * \param[in] size_B size of the B tensor
 83 |        * \param[in] size_C size of the C tensor
 84 |        * \param[in] total_iter number of gemms to be done
 85 |        * \param[in] upload_phase_A period in iterations with which to upload A
 86 |        * \param[in] upload_phase_B period in iterations with which to upload B
 87 |        * \param[in] download_phase_C period in iterations with which to download C
 88 |        */
 89 |       ctr_offload(contraction const * c,
 90 |                   int64_t size_A,
 91 |                   int64_t size_B,
 92 |                   int64_t size_C,
 93 |                   int total_iter,
 94 |                   int upload_phase_A,
 95 |                   int upload_phase_B,
 96 |                   int download_phase_C);
 97 | 
 98 |   };
 99 |   #endif
100 | 
101 | }
102 | 
103 | #endif
104 | 


--------------------------------------------------------------------------------
/test/sy_times_ns.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup sy_times_ns sy_times_ns 
  6 |   * @{ 
  7 |   * \brief Tests contraction of a symmetric index group with a nonsymmetric one
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | 
 12 | using namespace CTF;
 13 | 
 14 | int sy_times_ns(int     n,
 15 |                 World & dw){
 16 |   int rank, i, num_pes, pass;
 17 |   int64_t np;
 18 |   double * pairs;
 19 |   int64_t * indices;
 20 |   
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 22 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 23 | 
 24 | 
 25 |   int shapeN4[] = {NS,NS,NS,NS};
 26 |   int sizeN4[] = {n,n,n,n};
 27 | 
 28 |   //* Creates distributed tensors initialized with zeros
 29 |   Tensor<> B(4, sizeN4, shapeN4, dw);
 30 | 
 31 |   Matrix<> A(n, n, SY, dw);
 32 |   Matrix<> An(n, n, NS, dw);
 33 |   Matrix<> C(n, n, SY, dw, "C");
 34 |   Matrix<> Cn(n, n, NS, dw, "Cn");
 35 | 
 36 |   srand48(13*rank);
 37 | 
 38 | 
 39 |   A.get_local_data(&np, &indices, &pairs);
 40 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 41 | //  A.write(np, indices, pairs);
 42 |   delete [] pairs;
 43 |   free(indices);
 44 |   B.get_local_data(&np, &indices, &pairs);
 45 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 46 | //  B.write(np, indices, pairs);
 47 |   delete [] pairs;
 48 |   free(indices);
 49 |   C.get_local_data(&np, &indices, &pairs);
 50 |   for (i=0; i<np; i++ ) pairs[i] = drand48()-.5; //(1.E-3)*sin(indices[i]);
 51 |   C.write(np, indices, pairs);
 52 |   delete [] pairs;
 53 |   free(indices);
 54 | 
 55 |   An["ij"] = A["ij"];
 56 |   Cn["ij"] = C["ij"];
 57 | 
 58 |   C["ij"] += A["ij"]*B["ijkl"];
 59 |   Cn["ij"] += An["ij"]*B["ijkl"];
 60 |   Cn["ji"] += An["ij"]*B["ijkl"];
 61 | 
 62 | 
 63 |   Cn["ij"] -= C["ij"];
 64 | 
 65 |   double norm = Cn.norm2();
 66 |   
 67 |   if (norm < 1.E-10){
 68 |     pass = 1;
 69 |     if (rank == 0)
 70 |       printf("{ C[\"(ij)\"]=A[\"(ij)\"]*B[\"ijkl\"] } passed \n");
 71 |   } else {
 72 |     pass = 0;
 73 |     if (rank == 0)
 74 |       printf("{ C[\"(ij)\"]=A[\"(ij)\"]*B[\"ijkl\"] } failed \n");
 75 |   }
 76 |   return pass;
 77 | } 
 78 | 
 79 | 
 80 | #ifndef TEST_SUITE
 81 | char* getCmdOption(char ** begin,
 82 |                    char ** end,
 83 |                    const   std::string & option){
 84 |   char ** itr = std::find(begin, end, option);
 85 |   if (itr != end && ++itr != end){
 86 |     return *itr;
 87 |   }
 88 |   return 0;
 89 | }
 90 | 
 91 | 
 92 | int main(int argc, char ** argv){
 93 |   int rank, np, n;
 94 |   int in_num = argc;
 95 |   char ** input_str = argv;
 96 | 
 97 |   MPI_Init(&argc, &argv);
 98 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 99 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
100 | 
101 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
102 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
103 |     if (n < 0) n = 7;
104 |   } else n = 7;
105 | 
106 | 
107 |   {
108 |     World dw(argc, argv);
109 |     sy_times_ns(n, dw);
110 |   }
111 | 
112 |   MPI_Finalize();
113 |   return 0;
114 | }
115 | /**
116 |  * @} 
117 |  * @}
118 |  */
119 | 
120 | #endif
121 | 


--------------------------------------------------------------------------------
/test/endomorphism_cust_sp.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup endomorphism_cust_sp endomorphism_cust_sp
  6 |   * @{ 
  7 |   * \brief tests cust_spom element-wise functions by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | struct cust_sp_type {
 14 |   char name[256];
 15 |   int len_name;
 16 | };
 17 | 
 18 | void comp_len(cust_sp_type & a){
 19 |   a.len_name = strlen(a.name);
 20 | }
 21 | 
 22 | int endomorphism_cust_sp(int     n,
 23 |                          World & dw){
 24 |   
 25 |   int shapeN4[] = {NS,NS,NS,NS};
 26 |   int sizeN4[] = {n+1,n,n+2,n+3};
 27 | 
 28 |   Set<cust_sp_type, false> s = Set<cust_sp_type, false>();
 29 | 
 30 |   Tensor<cust_sp_type> A(4, true, sizeN4, shapeN4, dw, s);
 31 | 
 32 |   if (dw.rank < n*n*n*n){
 33 |     srand48(dw.rank);
 34 |     int str_len = drand48()*255;
 35 | 
 36 |     cust_sp_type my_obj;
 37 |     std::fill(my_obj.name, my_obj.name+str_len, 'a');
 38 |     my_obj.name[str_len]='\0';
 39 | 
 40 |     int64_t idx = dw.rank;      
 41 |     A.write(1, &idx, &my_obj);
 42 |   } else
 43 |     A.write(0, NULL, NULL);
 44 | 
 45 |   CTF::Transform<cust_sp_type> endo(comp_len);
 46 |   // below is equivalent to A.scale(NULL, "ijkl", endo);
 47 |   endo(A["ijkl"]);
 48 | 
 49 |   int64_t * indices;
 50 |   cust_sp_type * loc_data;
 51 |   int64_t nloc;
 52 |   A.get_local_data(&nloc, &indices, &loc_data, true);
 53 | 
 54 |   int pass = 1;
 55 |   if (pass){
 56 |     for (int64_t i=0; i<nloc; i++){
 57 |       if ((int)strlen(loc_data[i].name) != loc_data[i].len_name) pass = 0;
 58 |     }
 59 |   } 
 60 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 61 | 
 62 |   if (dw.rank == 0){
 63 |     if (pass){
 64 |       printf("{ A[\"ijkl\"] = comp_len(A[\"ijkl\"]) } passed\n");
 65 |     } else {
 66 |       printf("{ A[\"ijkl\"] = comp_len(A[\"ijkl\"]) } failed\n");
 67 |     }
 68 |   } 
 69 | 
 70 |   free(indices);
 71 |   delete [] loc_data;
 72 |   
 73 |   return pass;
 74 | } 
 75 | 
 76 | 
 77 | #ifndef TEST_SUITE
 78 | 
 79 | char* getCmdOption(char ** begin,
 80 |                    char ** end,
 81 |                    const   std::string & option){
 82 |   char ** itr = std::find(begin, end, option);
 83 |   if (itr != end && ++itr != end){
 84 |     return *itr;
 85 |   }
 86 |   return 0;
 87 | }
 88 | 
 89 | 
 90 | int main(int argc, char ** argv){
 91 |   int rank, np, n;
 92 |   int const in_num = argc;
 93 |   char ** input_str = argv;
 94 | 
 95 |   MPI_Init(&argc, &argv);
 96 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 97 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 98 | 
 99 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
100 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
101 |     if (n < 0) n = 5;
102 |   } else n = 5;
103 | 
104 | 
105 |   {
106 |     World dw(MPI_COMM_WORLD, argc, argv);
107 | 
108 |     if (rank == 0){
109 |       printf("Computing user-defined endomorphism on a tensor over a set, A_ijkl = f(A_ijkl)\n");
110 |     }
111 |     endomorphism_cust_sp(n, dw);
112 |   }
113 | 
114 | 
115 |   MPI_Finalize();
116 |   return 0;
117 | }
118 | 
119 | /**
120 |  * @} 
121 |  * @}
122 |  */
123 | 
124 | #endif
125 | 


--------------------------------------------------------------------------------
/src/interface/vector.h:
--------------------------------------------------------------------------------
 1 | #ifndef __VECTOR_H__
 2 | #define __VECTOR_H__
 3 | 
 4 | namespace CTF {
 5 | 
 6 |   /**
 7 |    * \addtogroup CTF
 8 |    * @{
 9 |    **/
10 |   /**
11 |    * \brief Vector class which encapsulates a 1D tensor 
12 |    */
13 |   template <typename dtype=double>
14 |   class Vector : public Tensor<dtype> {
15 |     public:
16 |       int len;
17 |       /** 
18 |        * \brief default constructor for a vector
19 |        */
20 |       Vector();
21 | 
22 |       /** 
23 |        * \brief copy constructor for a matrix
24 |        * \param[in] A matrix to copy along with its data
25 |        */
26 |       Vector<dtype>(Vector<dtype> const & A);
27 | 
28 |       /** 
29 |        * \brief casts a tensor to a matrix
30 |        * \param[in] A tensor object of order 1
31 |        */
32 |       Vector<dtype>(Tensor<dtype> const & A);
33 | 
34 |       /**
35 |        * \brief constructor for a vector
36 |        * \param[in] len dimension of vector
37 |        * \param[in] world CTF world where the tensor will live
38 |        * \param[in] sr defines the tensor arithmetic for this tensor
39 |        */ 
40 |       Vector(int                       len,
41 |              World &                   world,
42 |              CTF_int::algstrct const & sr);
43 | 
44 |       /**
45 |        * \brief constructor for a vector
46 |        * \param[in] len dimension of vector
47 |        * \param[in] world CTF world where the tensor will live
48 |        * \param[in] name an optionary name for the tensor
49 |        * \param[in] profile set to 1 to profile contractions involving this tensor
50 |        * \param[in] sr defines the tensor arithmetic for this tensor
51 |        */ 
52 |       Vector(int                       len,
53 |              World &                   world=get_universe(),
54 |              char const *              name=NULL,
55 |              int                       profile=0,
56 |              CTF_int::algstrct const & sr=Ring<dtype>());
57 | 
58 |       /**
59 |        * \brief constructor for a vector
60 |        * \param[in] len dimension of vector
61 |        * \param[in] atr quantifier for sparsity and symmetry of matrix (0 -> dense, >0 -> sparse)
62 |        * \param[in] world CTF world where the tensor will live
63 |        * \param[in] sr defines the tensor arithmetic for this tensor
64 |        */ 
65 |       Vector(int                       len,
66 |              int                       atr,
67 |              World &                   world=get_universe(),
68 |              CTF_int::algstrct const & sr=Ring<dtype>());
69 | 
70 | 
71 |       /**
72 |        * \brief constructor for a vector
73 |        * \param[in] len dimension of vector
74 |        * \param[in] atr quantifier for sparsity and symmetry of matrix (0 -> dense, >0 -> sparse)
75 |        * \param[in] world CTF world where the tensor will live
76 |        */ 
77 |       Vector(int                       len,
78 |              int                       atr,
79 |              World &                   world,
80 |              char const *              name,
81 |              int                       profile=0,
82 |              CTF_int::algstrct const & sr=Ring<dtype>());
83 | 
84 | 
85 | 
86 |       //Vector<dtype> & operator=(const Vector<dtype> & A);
87 |   /**
88 |    * @}
89 |    */
90 |   };
91 | }
92 | #include "vector.cxx"
93 | #endif
94 | 


--------------------------------------------------------------------------------
/src/interface/sparse_tensor.cxx:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #include "common.h"
 4 | 
 5 | namespace CTF {
 6 | 
 7 |   template<typename dtype>
 8 |   Sparse_Tensor<dtype>::Sparse_Tensor(){
 9 |     parent = NULL;
10 |   }
11 | 
12 |   template<typename dtype>
13 |   Sparse_Tensor<dtype>::Sparse_Tensor(std::vector<int64_t>   indices_,
14 |                                              Tensor<dtype> * parent_){
15 |     parent  = parent_;
16 |     indices = indices_;
17 |     scale   = *(dtype*)parent_->sr->mulid();
18 |   }
19 | 
20 |   template<typename dtype>
21 |   Sparse_Tensor<dtype>::Sparse_Tensor(int64_t                n,
22 |                                              int64_t *              indices_,
23 |                                              Tensor<dtype> * parent_){
24 |     parent  = parent_;
25 |     indices = std::vector<int64_t>(indices_,indices_+n);
26 |     scale   = *(dtype*)parent_->sr->mulid();
27 |   }
28 | 
29 |   template<typename dtype>
30 |   void Sparse_Tensor<dtype>::write(dtype   alpha,
31 |                                           dtype * values,
32 |                                           dtype   beta){
33 |     parent->write(indices.size(),alpha,beta,&indices[0],&values[0]);
34 |   }
35 | 
36 |   // C++ overload special-cases of above method
37 |   template<typename dtype>
38 |   void Sparse_Tensor<dtype>::operator=(std::vector<dtype> values){
39 |     write(*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->addid());
40 |   }
41 |   template<typename dtype>
42 |   void Sparse_Tensor<dtype>::operator=(dtype* values){
43 |     write(*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->addid());
44 |   }
45 | 
46 |   template<typename dtype>
47 |   void Sparse_Tensor<dtype>::operator+=(std::vector<dtype> values){
48 |     write(*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->mulid());
49 |   }
50 | 
51 |   template<typename dtype>
52 |   void Sparse_Tensor<dtype>::operator+=(dtype* values){
53 |     write(*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->mulid());
54 |   }
55 | 
56 |   template<typename dtype>
57 |   void Sparse_Tensor<dtype>::operator-=(std::vector<dtype> values){
58 |     write(-*(dtype const*)parent->sr->mulid(), &values[0], *(dtype const*)parent->sr->mulid());
59 |   }
60 | 
61 |   template<typename dtype>
62 |   void Sparse_Tensor<dtype>::operator-=(dtype* values){
63 |     write(-*(dtype const*)parent->sr->mulid(), values, *(dtype const*)parent->sr->mulid());
64 |   }
65 | 
66 |   template<typename dtype>
67 |   void Sparse_Tensor<dtype>::read(dtype   alpha, 
68 |                                          dtype * values,
69 |                                          dtype   beta){
70 |     parent->read(indices.size(),alpha,beta,&indices[0],values);
71 |   }
72 |   template<typename dtype>
73 |   Sparse_Tensor<dtype>::operator std::vector<dtype>(){
74 |     std::vector<dtype> values(indices.size());
75 |     read(parent->sr->mulid(), &values[0], parent->sr->addid());
76 |     return values;
77 |   }
78 | 
79 |   template<typename dtype>
80 |   Sparse_Tensor<dtype>::operator dtype*(){
81 |     dtype * values = (dtype*)malloc(sizeof(dtype)*indices.size());
82 |     read(parent->sr->mulid(), values, parent->sr->addid());
83 |     return values;
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/examples/particle_interaction.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup examples 
  4 |   * @{ 
  5 |   * \defgroup particle_interaction particle_interaction
  6 |   * @{ 
  7 |   * \brief tests custom element-wise functions by computing interactions between particles and integrating
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | #include "moldynamics.h"
 12 | using namespace CTF;
 13 | int particle_interaction(int     n,
 14 |                         World & dw){
 15 |   
 16 |   Set<particle> sP = Set<particle>();
 17 |   Group<force> gF = Group<force>();
 18 | 
 19 |   Vector<particle> P(n, dw, sP);
 20 | 
 21 |   particle * loc_parts;
 22 |   int64_t nloc;
 23 |   int64_t * inds;
 24 |   P.get_local_data(&nloc, &inds, &loc_parts);
 25 |   
 26 |   srand48(dw.rank);
 27 | 
 28 |   for (int64_t i=0; i<nloc; i++){
 29 |     loc_parts[i].dx = drand48();
 30 |     loc_parts[i].dy = drand48();
 31 |     loc_parts[i].coeff = .001*drand48();
 32 |     loc_parts[i].id = 777;
 33 |   }
 34 |   P.write(nloc, inds, loc_parts);
 35 |   free(inds);
 36 |   delete [] loc_parts;
 37 | 
 38 |   Vector<force> F(n, dw, gF);
 39 |   
 40 | //  CTF::Bivar_Function<particle, particle, force> fGF(&get_force);
 41 |   CTF::Bivar_Kernel<particle, particle, force, get_force> fGF;
 42 | 
 43 |   F["i"] += fGF(P["i"],P["j"]);
 44 |  
 45 |   Matrix<force> F_all(n, n, NS, dw, gF);
 46 | 
 47 |   F_all["ij"] = fGF(P["i"],P["j"]);
 48 | 
 49 | 
 50 |   Vector<> f_mgn(n, dw);
 51 | 
 52 |   CTF::Function<force, double> get_mgn([](force f){ return f.fx+f.fy; } );
 53 | 
 54 |   f_mgn["i"] += get_mgn(F_all["ij"]);
 55 |   -1.0*f_mgn["i"] += get_mgn(F["i"]);
 56 | 
 57 |   int pass = (f_mgn.norm2() < 1.E-6);
 58 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 59 | 
 60 |   if (dw.rank == 0){
 61 |     if (pass){
 62 |       printf("{ F[\"i\"] = get_force(P[\"i\"],P[\"j\"]) } passed\n");
 63 |     } else {
 64 |       printf("{ F[\"i\"] = get_force(P[\"i\"],P[\"j\"]) } failed\n");
 65 |     }
 66 |   } 
 67 | 
 68 |   Transform<force,particle>([] (force f, particle & p){ p.dx += f.fx*p.coeff; p.dy += f.fy*p.coeff; })(F["i"], P["i"]);
 69 |   
 70 |   return pass;
 71 | } 
 72 | 
 73 | 
 74 | #ifndef TEST_SUITE
 75 | 
 76 | char* getCmdOption(char ** begin,
 77 |                    char ** end,
 78 |                    const   std::string & option){
 79 |   char ** itr = std::find(begin, end, option);
 80 |   if (itr != end && ++itr != end){
 81 |     return *itr;
 82 |   }
 83 |   return 0;
 84 | }
 85 | 
 86 | 
 87 | int main(int argc, char ** argv){
 88 |   int rank, np, n;
 89 |   int const in_num = argc;
 90 |   char ** input_str = argv;
 91 | 
 92 |   MPI_Init(&argc, &argv);
 93 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 94 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 95 | 
 96 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
 97 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
 98 |     if (n < 0) n = 5;
 99 |   } else n = 5;
100 | 
101 | 
102 |   {
103 |     World dw(MPI_COMM_WORLD, argc, argv);
104 | 
105 |     if (rank == 0){
106 |       printf("Computing particle_interaction A_ijkl = f(B_ijkl, A_ijkl)\n");
107 |     }
108 |     particle_interaction(n, dw);
109 |   }
110 | 
111 | 
112 |   MPI_Finalize();
113 |   return 0;
114 | }
115 | 
116 | /**
117 |  * @} 
118 |  * @}
119 |  */
120 | 
121 | #endif
122 | 


--------------------------------------------------------------------------------
/src/interface/sparse_tensor.h:
--------------------------------------------------------------------------------
 1 | #ifndef __SPARSE_TENSOR_H__
 2 | #define __SPARSE_TENSOR_H__
 3 | 
 4 | namespace CTF {
 5 |   /**
 6 |    * \defgroup CTF CTF Tensor
 7 |    * \addtogroup CTF
 8 |    * @{
 9 |    */
10 |   /**
11 |    * \brief a sparse subset of a tensor 
12 |    */
13 |   template<typename dtype=double>
14 |   class Sparse_Tensor {
15 |     public:
16 |       /** \brief dense tensor whose subset this sparse tensor is of */
17 |       Tensor<dtype> * parent;
18 |       /** \brief indices of the sparse elements of this tensor */
19 |       std::vector<int64_t > indices;
20 |       /** \brief scaling factor by which to scale the tensor elements */
21 |       dtype scale;
22 | 
23 |       /** 
24 |         * \brief base constructor 
25 |         */
26 |       Sparse_Tensor();
27 |       
28 |       /**
29 |        * \brief initialize a tensor which corresponds to a set of indices 
30 |        * \param[in] indices a vector of global indices to tensor values
31 |        * \param[in] parent dense distributed tensor to which this sparse tensor belongs to
32 |        */
33 |       Sparse_Tensor(std::vector<int64_t >   indices,
34 |                     Tensor<dtype> * parent);
35 | 
36 |       /**
37 |        * \brief initialize a tensor which corresponds to a set of indices 
38 |        * \param[in] n number of values this sparse tensor will have locally
39 |        * \param[in] indices an array of global indices to tensor values
40 |        * \param[in] parent dense distributed tensor to which this sparse tensor belongs to
41 |        */
42 |       Sparse_Tensor(int64_t                 n,
43 |                     int64_t       *         indices,
44 |                     Tensor<dtype> * parent);
45 | 
46 |       /**
47 |        * \brief set the sparse set of indices on the parent tensor to values
48 |        *        forall(j) i = indices[j]; parent[i] = beta*parent[i] + alpha*values[j];
49 |        * \param[in] alpha scaling factor on values array 
50 |        * \param[in] values data, should be of same size as the number of indices (n)
51 |        * \param[in] beta scaling factor to apply to previously existing data
52 |        */
53 |       void write(dtype   alpha, 
54 |                  dtype * values,
55 |                  dtype   beta); 
56 | 
57 |       // C++ overload special-cases of above method
58 |       void operator=(std::vector<dtype> values); 
59 |       void operator+=(std::vector<dtype> values); 
60 |       void operator-=(std::vector<dtype> values); 
61 |       void operator=(dtype * values); 
62 |       void operator+=(dtype * values); 
63 |       void operator-=(dtype * values); 
64 | 
65 |       /**
66 |        * \brief read the sparse set of indices on the parent tensor to values
67 |        *        forall(j) i = indices[j]; values[j] = alpha*parent[i] + beta*values[j];
68 |        * \param[in] alpha scaling factor on parent array 
69 |        * \param[in] values data, should be preallocated to the same size as the number of indices (n)
70 |        * \param[in] beta scaling factor to apply to previously existing data in values
71 |        */
72 |       void read(dtype   alpha, 
73 |                 dtype * values,
74 |                 dtype   beta); 
75 | 
76 |       // C++ overload special-cases of above method
77 |       operator std::vector<dtype>();
78 |       operator dtype*();
79 |   };
80 |   /**
81 |    * @}
82 |    */
83 | }
84 | 
85 | #include "sparse_tensor.cxx"
86 | #endif
87 | 


--------------------------------------------------------------------------------
/test/sptensor_sum.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup tests 
  2 |   * @{ 
  3 |   * \defgroup sptensor_sum sptensor_sum
  4 |   * @{ 
  5 |   * \brief Summation of sparse tensors
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | using namespace CTF;
 10 | 
 11 | int sptensor_sum(int     n,
 12 |                  World & dw){
 13 | 
 14 |   int shapeN4[] = {NS,NS,NS,NS};
 15 |   int sizeN4[] = {n,n,n,n};
 16 | 
 17 |   // Creates distributed sparse tensors initialized with zeros
 18 |   Tensor<> A(4, true, sizeN4, shapeN4, dw);
 19 |   Tensor<> B(4, true, sizeN4, shapeN4, dw);
 20 | 
 21 |   if (dw.rank == dw.np/2){
 22 |     int64_t keys_A[4] = {1,2,4,8};
 23 |     double vals_A[4] = {3.2,42.,1.4,-.8};
 24 | 
 25 |     A.write(4, keys_A, vals_A);
 26 | 
 27 |     int64_t keys_B[4] = {2,3};
 28 |     double vals_B[4] = {24.,7.2};
 29 | 
 30 |     B.write(2, keys_B, vals_B);
 31 |   } else {
 32 |     A.write(0, NULL, NULL);
 33 |     B.write(0, NULL, NULL);
 34 |   }
 35 | 
 36 |   //A.print();
 37 |   //B.print();
 38 | 
 39 |   B["abij"] += A["abij"];
 40 |  
 41 |   //B.print();
 42 | 
 43 |   int64_t * new_keys_B;
 44 |   double * new_vals_B;
 45 |   int64_t nloc;
 46 |   B.get_local_data(&nloc, &new_keys_B, &new_vals_B, true);
 47 |   int pass = 1;
 48 |   for (int i=0; i<nloc; i++){
 49 |     switch (new_keys_B[i]){
 50 |       case 1: 
 51 |         if (fabs(3.2-new_vals_B[i]) > 1.E-9) pass = 0;
 52 |         break;
 53 |       case 2: 
 54 |         if (fabs(66.-new_vals_B[i]) > 1.E-9) pass = 0;
 55 |         break;
 56 |       case 3: 
 57 |         if (fabs(7.2-new_vals_B[i]) > 1.E-9) pass = 0;
 58 |         break;
 59 |       case 4: 
 60 |         if (fabs(1.4-new_vals_B[i]) > 1.E-9) pass = 0;
 61 |         break;
 62 |       case 8: 
 63 |         if (fabs(-.8-new_vals_B[i]) > 1.E-9) pass = 0;
 64 |         break;
 65 |       default:
 66 |         pass = 0;
 67 |         break;
 68 |     }
 69 |   }
 70 |   free(new_keys_B);
 71 |   delete [] new_vals_B;
 72 | 
 73 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 74 |   if (dw.rank == 0){
 75 |     if (pass) 
 76 |       printf("{ B[\"abij\"] += A[\"abij\"] with sparse, A, B } passed \n");
 77 |     else
 78 |       printf("{ B[\"abij\"] += A[\"abij\"] with sparse, A, B } failed\n");
 79 |   }
 80 |   return pass;
 81 | } 
 82 | 
 83 | 
 84 | #ifndef TEST_SUITE
 85 | char* getCmdOption(char ** begin,
 86 |                    char ** end,
 87 |                    const   std::string & option){
 88 |   char ** itr = std::find(begin, end, option);
 89 |   if (itr != end && ++itr != end){
 90 |     return *itr;
 91 |   }
 92 |   return 0;
 93 | }
 94 | 
 95 | 
 96 | int main(int argc, char ** argv){
 97 |   int rank, np, n, pass;
 98 |   int const in_num = argc;
 99 |   char ** input_str = argv;
100 | 
101 |   MPI_Init(&argc, &argv);
102 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
103 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
104 | 
105 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
106 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
107 |     if (n < 0) n = 7;
108 |   } else n = 7;
109 | 
110 | 
111 |   {
112 |     World dw(argc, argv);
113 | 
114 |     if (rank == 0){
115 |       printf("Computing B+=A with B, A sparse\n");
116 |     }
117 |     pass = sptensor_sum(n, dw);
118 |     assert(pass);
119 |   }
120 | 
121 |   MPI_Finalize();
122 |   return 0;
123 | }
124 | /**
125 |  * @} 
126 |  * @}
127 |  */
128 | 
129 | #endif
130 | 


--------------------------------------------------------------------------------
/src/contraction/spctr_offload.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __SPCTR_OFFLOAD_H__
 4 | #define __SPCTR_OFFLOAD_H__
 5 | 
 6 | #include "../shared/offload.h"
 7 | #include "spctr_tsr.h"
 8 | 
 9 | namespace CTF_int {
10 |   #ifdef OFFLOAD
11 |   class spctr_offload : public spctr {
12 |     public: 
13 |       /* Class to be called on sub-blocks */
14 |       spctr * rec_ctr;
15 |       int iter_counter;
16 |       int total_iter;
17 |       int upload_phase_A;
18 |       int upload_phase_B;
19 |       int download_phase_C;
20 |       int64_t size_A; /* size of A blocks */
21 |       int64_t size_B; /* size of B blocks */
22 |       int64_t size_C; /* size of C blocks */
23 |       offload_arr * spr_A;
24 |       offload_arr * spr_B;
25 |       offload_arr * spr_C;
26 |       
27 |       /**
28 |        * \brief print ctr object
29 |        */
30 |       void print();
31 | 
32 |       /**
33 |        * \brief offloads and downloads local blocks of dense or CSR tensors
34 |        */
35 |       void run(char * A, int nblk_A, int64_t const * size_blk_A,
36 |                char * B, int nblk_B, int64_t const * size_blk_B,
37 |                char * C, int nblk_C, int64_t * size_blk_C,
38 |                char *& new_C);
39 | 
40 |       /**
41 |        * \brief returns the number of bytes of buffer space
42 |          we need 
43 |        * \return bytes needed
44 |        */
45 |       int64_t spmem_fp();
46 | 
47 |       /**
48 |        * \brief returns the number of bytes of buffer space we need recursively 
49 |        * \return bytes needed for recursive contraction
50 |        */
51 |       int64_t mem_rec(double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
52 | 
53 |       /**
54 |        * \brief returns the time this kernel will take excluding calls to rec_ctr
55 |        * \return seconds needed
56 |        */
57 |       double est_time_fp(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
58 | 
59 |       /**
60 |        * \brief returns the time this kernel will take including calls to rec_ctr
61 |        * \return seconds needed for recursive contraction
62 |        */
63 |       double est_time_rec(int nlyr, double nnz_frac_A, double nnz_frac_B, double nnz_frac_C);
64 | 
65 |       spctr * clone();
66 | 
67 |       spctr_offload(spctr * other);
68 | 
69 |       /**
70 |        * \brief deallocates ctr_offload object
71 |        */
72 |       ~spctr_offload();
73 | 
74 |       /**
75 |        * \brief allocates ctr_offload object
76 |        * \param[in] c contraction object
77 |        * \param[in] size_A size of the A tensor
78 |        * \param[in] size_B size of the B tensor
79 |        * \param[in] size_C size of the C tensor
80 |        * \param[in] total_iter number of gemms to be done
81 |        * \param[in] upload_phase_A period in iterations with which to upload A
82 |        * \param[in] upload_phase_B period in iterations with which to upload B
83 |        * \param[in] download_phase_C period in iterations with which to download C
84 |        */
85 |       spctr_offload(contraction const * c,
86 |                     int64_t size_A,
87 |                     int64_t size_B,
88 |                     int64_t size_C,
89 |                     int total_iter,
90 |                     int upload_phase_A,
91 |                     int upload_phase_B,
92 |                     int download_phase_C);
93 | 
94 |   };
95 |   #endif
96 | 
97 | }
98 | #endif
99 | 


--------------------------------------------------------------------------------
/src_python/ctf_ext.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "../include/ctf.hpp"
 3 |   
 4 | namespace CTF_int{
 5 | 
 6 | 
 7 |   /**
 8 |    * \python absolute value function
 9 |    * \param[in] A tensor, param[in,out] B tensor (becomes absolute value of A)
10 |    * \return None
11 |    */
12 |   template <typename dtype>
13 |   void abs_helper(tensor * A, tensor * B);
14 | 
15 |   /**
16 |    * \python pow function
17 |    * \param[in] A tensor, param[in] B tensor, param[in,out] C tensor, param[in] index of A, param[in] index of B, param[in] index of C
18 |    * \return None
19 |    */
20 |   template <typename dtype>
21 |   void pow_helper(tensor * A, tensor * B, tensor * C, char const * idx_A, char const * idx_B, char const * idx_C);
22 | 
23 |   /**
24 |    * \python all function
25 |    * \param[in] A tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B
26 |    * \return None
27 |    */
28 |   template <typename dtype>
29 |   void all_helper(tensor * A, tensor * B_bool, char const * idx_A, char const * idx_B);
30 |   
31 |   template <typename dtype>
32 |   void conj_helper(tensor * A, tensor * B);
33 | 
34 |   /**
35 |    * \python function that get the real part from complex numbers
36 |    * \param[in] A tensor, param[in] B tensor stores the real part from tensor A
37 |    * \return None
38 |    */
39 |   template <typename dtype>
40 |   void get_real(tensor * A, tensor * B);
41 | 
42 |   /**
43 |    * \python function that get the imaginary part from complex numbers
44 |    * \param[in] A tensor, param[in] B tensor stores the imaginary part from tensor A
45 |    * \return None
46 |    */
47 |   template <typename dtype>
48 |   void get_imag(tensor * A, tensor * B);
49 | 
50 |   /**
51 |    * \python function that set the real part from complex numbers
52 |    * \param[in] A tensor, param[in] B tensor stores the real part from tensor A
53 |    * \return None
54 |    */
55 |   template <typename dtype>
56 |   void set_real(tensor * A, tensor * B);
57 | 
58 |   /**
59 |    * \python function that set the imaginary part from complex numbers
60 |    * \param[in] A tensor, param[in] B tensor stores the imaginary part from tensor A
61 |    * \return None
62 |    */
63 |   template <typename dtype>
64 |   void set_imag(tensor * A, tensor * B);
65 | 
66 |   /**
67 |    * \python any function
68 |    * \param[in] A tensor, param[in] B tensor with bool values created, param[in] index of A, param[in] index of B
69 |    * \return None
70 |    */
71 |   template <typename dtype>
72 |   void any_helper(tensor * A, tensor * B_bool, char const * idx_A, char const * idx_B);
73 |   /**
74 |    * \brief sum all 1 values in boolean tensor
75 |    * \param[in] A tensor of boolean values
76 |    * \return number of 1s in A
77 |    */
78 |   int64_t sum_bool_tsr(tensor * A);
79 | 
80 |   void matrix_svd(tensor * A, tensor * U, tensor * S, tensor * VT, int rank);
81 |   void matrix_svd_cmplx(tensor * A, tensor * U, tensor * S, tensor * VT, int rank);
82 |   
83 |   void matrix_qr(tensor * A, tensor * Q, tensor * R);
84 |   void matrix_qr_cmplx(tensor * A, tensor * Q, tensor * R);
85 | 
86 |   /**
87 |    * \brief convert tensor from one type to another
88 |    * \param[in] type_idx1 index of first ype
89 |    * \param[in] type_idx2 index of second ype
90 |    * \param[in] A tensor to convert
91 |    * \param[in] B tensor to convert to
92 |    */
93 |   void conv_type(int type_idx1, int type_idx2, tensor * A, tensor * B);
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/test/dft.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup DFT  DFT
  6 |   * @{ 
  7 |   * \brief Discrete Fourier Transform by matrix multiplication
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 |   
 13 | int test_dft(int64_t n,
 14 |              World   &wrld){
 15 |   int numPes, myRank;
 16 |   int64_t  np, i;
 17 |   int64_t * idx;
 18 |   std::complex<double> * data;
 19 |   std::complex<double> imag(0,1);
 20 |   MPI_Comm_size(MPI_COMM_WORLD, &numPes);
 21 |   MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
 22 |   Matrix < std::complex<double> >DFT(n, n, SY, wrld, "DFT", 1);
 23 |   Matrix < std::complex<double>  >IDFT(n, n, SY, wrld, "IDFT", 0);
 24 | 
 25 |   DFT.get_local_data(&np, &idx, &data);
 26 | 
 27 |   for (i=0; i<np; i++){
 28 |     data[i] = exp(-2.*(idx[i]/n)*(idx[i]%n)*(M_PI/n)*imag);
 29 |   //  printf("[%lld][%lld] (%20.14E,%20.14E)\n",i%n,i/n,data[i].real(),data[i].imag());
 30 |   }
 31 |   DFT.write(np, idx, data);
 32 |   //DFT.print(stdout);
 33 |   free(idx);
 34 |   delete [] data; 
 35 |   
 36 |   IDFT.get_local_data(&np, &idx, &data);
 37 | 
 38 |   for (i=0; i<np; i++){
 39 |     data[i] = (1./n)*exp(2.*(idx[i]/n)*(idx[i]%n)*(M_PI/n)*imag);
 40 |   }
 41 |   IDFT.write(np, idx, data);
 42 |   //IDFT.print(stdout);
 43 |   free(idx);
 44 |   delete [] data; 
 45 | 
 46 |   /*DFT.contract(std::complex<double> (1.0, 0.0), DFT, "ij", IDFT, "jk", 
 47 |                std::complex<double> (0.0, 0.0), "ik");*/
 48 |   DFT["ik"] = .5*DFT["ij"]*IDFT["jk"];
 49 | 
 50 |   Scalar< std::complex<double> > ss(wrld);
 51 |   ss[""] = Function< std::complex<double>, std::complex<double>, std::complex<double> >([](std::complex<double> a, std::complex<double> b){ return a+b; })(DFT["ij"],DFT["ij"]);
 52 |  
 53 |   DFT.get_local_data(&np, &idx, &data);
 54 |   int pass = 1;
 55 |   //DFT.print(stdout);
 56 |   for (i=0; i<np; i++){
 57 |     //printf("data[%lld] = %lf\n",idx[i],data[i].real());
 58 |     if (idx[i]/n == idx[i]%n){
 59 |       if (fabs(data[i].real() - 1.)>=1.E-9)
 60 |         pass = 0;
 61 |     } else  {
 62 |       if (fabs(data[i].real())>=1.E-9)
 63 |         pass = 0;
 64 |     }
 65 |   }
 66 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 67 |   
 68 |   if (myRank == 0) {
 69 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 70 |     if (pass)
 71 |       printf("{ DFT[\"ik\"] = DFT[\"ij\"]*IDFT[\"jk\"] } passed\n");
 72 |     else
 73 |       printf("{ DFT[\"ik\"] = DFT[\"ij\"]*IDFT[\"jk\"] } failed\n");
 74 |   } else 
 75 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 76 | 
 77 |   MPI_Barrier(MPI_COMM_WORLD);
 78 | 
 79 |   free(idx);
 80 |   delete [] data; 
 81 |   return pass;
 82 | }
 83 | 
 84 | #ifndef TEST_SUITE
 85 | /**
 86 |  * \brief Forms N-by-N DFT matrix A and inverse-dft iA and checks A*iA=I
 87 |  */
 88 | int main(int argc, char ** argv){
 89 |   int logn;
 90 |   int64_t n;
 91 |   
 92 |   MPI_Init(&argc, &argv);
 93 | 
 94 |   if (argc > 1){
 95 |     logn = atoi(argv[1]);
 96 |     if (logn<0) logn = 5;
 97 |   } else {
 98 |     logn = 5;
 99 |   }
100 |   n = 1<<logn;
101 | 
102 | 
103 |   {
104 |     World dw(argc, argv);
105 |     int pass = test_dft(n, dw);
106 |     assert(pass);
107 |   }
108 | 
109 |   MPI_Finalize();
110 |   
111 | }
112 | /**
113 |  * @} 
114 |  * @}
115 |  */
116 | 
117 | 
118 | #endif
119 | 


--------------------------------------------------------------------------------
/test/bivar_transform.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup bivar_transform bivar_transform
  6 |   * @{ 
  7 |   * \brief tests custom element-wise transforms by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | void f3(double a, double b, double & c){
 14 |   c = a*c*a+b*c*b;
 15 | }
 16 | 
 17 | int bivar_transform(int     n,
 18 |                     World & dw){
 19 |   
 20 |   int shapeN4[] = {NS,NS,NS,NS};
 21 |   int sizeN4[] = {n+1,n,n+2,n+3};
 22 | 
 23 |   Tensor<> A(4, sizeN4, shapeN4, dw);
 24 |   Tensor<> B(4, sizeN4, shapeN4, dw);
 25 |   Tensor<> C(4, sizeN4, shapeN4, dw);
 26 | 
 27 |   srand48(dw.rank);
 28 |   A.fill_random(-.5, .5);
 29 |   B.fill_random(-.5, .5);
 30 |   C.fill_random(-.5, .5);
 31 | 
 32 | 
 33 |   double * all_start_data_A;
 34 |   int64_t nall_A;
 35 |   A.read_all(&nall_A, &all_start_data_A);
 36 |   double * all_start_data_B;
 37 |   int64_t nall_B;
 38 |   B.read_all(&nall_B, &all_start_data_B);
 39 |   double * all_start_data_C;
 40 |   int64_t nall_C;
 41 |   C.read_all(&nall_C, &all_start_data_C);
 42 | 
 43 |   CTF::Transform<> bfun([](double a, double b, double & c){ c = a*c*a + b*c*b; });
 44 |   bfun(A["ijkl"],B["ijkl"],C["ijkl"]);
 45 | 
 46 |   double * all_end_data_C;
 47 |   int64_t nall2_C;
 48 |   C.read_all(&nall2_C, &all_end_data_C);
 49 | 
 50 |   int pass = (nall_C == nall2_C);
 51 |   if (pass){
 52 |     for (int64_t i=0; i<nall_A; i++){
 53 |       double k = all_start_data_C[i];
 54 |       f3(all_start_data_A[i],all_start_data_B[i], k);
 55 |       if (fabs(k-all_end_data_C[i])>=1.E-6){ 
 56 |         pass =0;
 57 |         printf(" %lf %lf %lf    %lf    %lf\n",all_start_data_A[i],all_start_data_B[i],all_start_data_C[i],k,all_end_data_C[i]);
 58 |       }
 59 |     }
 60 |   } 
 61 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 62 | 
 63 |   if (dw.rank == 0){
 64 |     if (pass){
 65 |       printf("{ f3(A[\"ijkl\"], B[\"ijkl\"], C[\"ijkl\"]) } passed\n");
 66 |     } else {
 67 |       printf("{ f3(A[\"ijkl\"], B[\"ijkl\"], C[\"ijkl\"]) } failed\n");
 68 |     }
 69 |   } 
 70 | 
 71 |   delete [] all_start_data_A;
 72 |   delete [] all_start_data_B;
 73 |   delete [] all_start_data_C;
 74 |   delete [] all_end_data_C;
 75 |   
 76 |   return pass;
 77 | } 
 78 | 
 79 | 
 80 | #ifndef TEST_SUITE
 81 | 
 82 | char* getCmdOption(char ** begin,
 83 |                    char ** end,
 84 |                    const   std::string & option){
 85 |   char ** itr = std::find(begin, end, option);
 86 |   if (itr != end && ++itr != end){
 87 |     return *itr;
 88 |   }
 89 |   return 0;
 90 | }
 91 | 
 92 | 
 93 | int main(int argc, char ** argv){
 94 |   int rank, np, n;
 95 |   int const in_num = argc;
 96 |   char ** input_str = argv;
 97 | 
 98 |   MPI_Init(&argc, &argv);
 99 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
100 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
101 | 
102 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
103 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
104 |     if (n < 0) n = 5;
105 |   } else n = 5;
106 | 
107 | 
108 |   {
109 |     World dw(MPI_COMM_WORLD, argc, argv);
110 | 
111 |     if (rank == 0){
112 |       printf("Computing bivar_transform A_ijkl = f(A_ijkl)\n");
113 |     }
114 |     bivar_transform(n, dw);
115 |   }
116 | 
117 | 
118 |   MPI_Finalize();
119 |   return 0;
120 | }
121 | 
122 | /**
123 |  * @} 
124 |  * @}
125 |  */
126 | 
127 | #endif
128 | 


--------------------------------------------------------------------------------
/src/shared/blas_symbs.cxx:
--------------------------------------------------------------------------------
 1 | #include "blas_symbs.h"
 2 | #include "util.h"
 3 | namespace CTF_BLAS {
 4 |   template <typename dtype>
 5 |   void gemm(const char *,
 6 |             const char *,
 7 |             const int *,
 8 |             const int *,
 9 |             const int *,
10 |             const dtype *,
11 |             const dtype *,
12 |             const int *,
13 |             const dtype *,
14 |             const int *,
15 |             const dtype *,
16 |             dtype *,
17 |             const int *){
18 |     printf("CTF ERROR GEMM not available for this type.\n");
19 |     ASSERT(0);
20 |     assert(0);
21 |   }
22 | #define INST_GEMM(dtype,s)                     \
23 |   template <>                                  \
24 |   void gemm<dtype>(const char * a,             \
25 |             const char * b,                    \
26 |             const int * c,                     \
27 |             const int * d,                     \
28 |             const int * e,                     \
29 |             const dtype * f,                   \
30 |             const dtype * g,                   \
31 |             const int * h,                     \
32 |             const dtype * i,                   \
33 |             const int * j,                     \
34 |             const dtype * k,                   \
35 |             dtype * l,                         \
36 |             const int * m){                    \
37 |     s ## GEMM(a,b,c,d,e,f,g,h,i,j,k,l,m); \
38 |   }
39 |   INST_GEMM(float,S)
40 |   INST_GEMM(double,D)
41 |   INST_GEMM(std::complex<float>,C)
42 |   INST_GEMM(std::complex<double>,Z)
43 | #undef INST_GEMM
44 | 
45 | 
46 | #ifdef USE_BATCH_GEMM
47 |   template <typename dtype>
48 |   void gemm_batch(const char *,
49 |             const char *,
50 |             const int *,
51 |             const int *,
52 |             const int *,
53 |             const dtype *,
54 |             dtype **,
55 |             const int *,
56 |             dtype **,
57 |             const int *,
58 |             const dtype *,
59 |             dtype **,
60 |             const int *,
61 |             const int *,
62 |             const int *){
63 |     printf("CTF ERROR gemm_batch not available for this type.\n");
64 |     ASSERT(0);
65 |     assert(0);
66 |   }
67 | 
68 | #define INST_GEMM_BATCH(dtype,s)                         \
69 |   template <>                                            \
70 |   void gemm_batch<dtype>(const char * a,                 \
71 |             const char * b,                              \
72 |             const int * c,                               \
73 |             const int * d,                               \
74 |             const int * e,                               \
75 |             const dtype * f,                             \
76 |             dtype ** g,                                  \
77 |             const int * h,                               \
78 |             dtype ** i,                                  \
79 |             const int * j,                               \
80 |             const dtype * k,                             \
81 |             dtype ** l,                                  \
82 |             const int * m,                               \
83 |             const int * n,                               \
84 |             const int * o){                              \
85 |     s ## GEMM_BATCH(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o); \
86 |   }
87 |   INST_GEMM_BATCH(float,S)
88 |   INST_GEMM_BATCH(double,D)
89 |   INST_GEMM_BATCH(std::complex<float>,C)
90 |   INST_GEMM_BATCH(std::complex<double>,Z)
91 | #endif
92 | }
93 | #undef INST_GEMM_BATCH
94 | 


--------------------------------------------------------------------------------
/src/redistribution/dgtog_redist.cxx:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #include "dgtog_calc_cnt.h"
  4 | #include "dgtog_redist.h"
  5 | #include "../shared/util.h"
  6 | #include "dgtog_bucket.h"
  7 | namespace CTF_int {
  8 |   //static double init_mdl[] = {COST_LATENCY, COST_LATENCY, COST_NETWBW};
  9 |   LinModel<3> dgtog_res_mdl(dgtog_res_mdl_init,"dgtog_res_mdl");
 10 | 
 11 |   double dgtog_est_time(int64_t tot_sz, int np){
 12 |     double ps[] = {1.0, (double)log2(np), (double)tot_sz*log2(np)};
 13 |     return dgtog_res_mdl.est_time(ps);
 14 |   }
 15 | }
 16 | 
 17 | #define MTAG 777
 18 | namespace CTF_redist_noror {
 19 |   #include "dgtog_redist_ror.h"
 20 | }
 21 | 
 22 | namespace CTF_redist_ror {
 23 |   #define ROR
 24 |   #include "dgtog_redist_ror.h"
 25 |   #undef ROR
 26 | }
 27 | 
 28 | namespace CTF_redist_ror_isr {
 29 |   #define ROR
 30 |   #define IREDIST
 31 |   #include "dgtog_redist_ror.h"
 32 |   #undef IREDIST
 33 |   #undef ROR
 34 | }
 35 | 
 36 | namespace CTF_redist_ror_put {
 37 |   #define ROR
 38 |   #define PUTREDIST
 39 |   #include "dgtog_redist_ror.h"
 40 |   #undef PUTREDIST
 41 |   #undef ROR
 42 | }
 43 | 
 44 | namespace CTF_redist_ror_isr_any {
 45 |   #define ROR
 46 |   #define IREDIST
 47 |   #define WAITANY
 48 |   #include "dgtog_redist_ror.h"
 49 |   #undef WAITANY
 50 |   #undef IREDIST
 51 |   #undef ROR
 52 | }
 53 | 
 54 | #ifdef USE_FOMPI
 55 | namespace CTF_redist_ror_put_any {
 56 |   #define ROR
 57 |   #define IREDIST
 58 |   #define PUTREDIST
 59 |   #define WAITANY
 60 |   #define PUT_NOTIFY
 61 |   #include "dgtog_redist_ror.h"
 62 |   #undef PUT_NOTIFY
 63 |   #undef WAITANY
 64 |   #undef PUTREDIST
 65 |   #undef IREDIST
 66 |   #undef ROR
 67 | }
 68 | #endif
 69 | 
 70 | 
 71 | namespace CTF_int {
 72 | 
 73 | 
 74 |   void dgtog_reshuffle(int const *          sym,
 75 |                        int const *          edge_len,
 76 |                        distribution const & old_dist,
 77 |                        distribution const & new_dist,
 78 |                        char **              ptr_tsr_data,
 79 |                        char **              ptr_tsr_new_data,
 80 |                        algstrct const *     sr,
 81 |                        CommData             ord_glb_comm){
 82 |     switch (CTF::DGTOG_SWITCH){
 83 |       case 0:
 84 |         CTF_redist_noror::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
 85 |         break;
 86 |       case 1:
 87 |         CTF_redist_ror::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
 88 |         break;
 89 |       case 2:
 90 |         CTF_redist_ror_isr::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
 91 |         break;
 92 |       case 3:
 93 |         CTF_redist_ror_put::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
 94 |         break;
 95 |       case 4:
 96 |         CTF_redist_ror_isr_any::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
 97 |         break;
 98 | #ifdef USE_FOMPI
 99 |       case 5:
100 |         CTF_redist_ror_put_any::dgtog_reshuffle(sym, edge_len, old_dist, new_dist, ptr_tsr_data, ptr_tsr_new_data, sr, ord_glb_comm);
101 |         break;
102 | #else
103 |       case 5:
104 |         if (ord_glb_comm.rank == 0) printf("FOMPI needed for this redistribution, ABORTING\n");
105 |         assert(0);
106 |         break;
107 | #endif
108 |       default:
109 |         assert(0);
110 |         break;
111 |     }
112 | 
113 |   }
114 | }
115 | 


--------------------------------------------------------------------------------
/examples/spectral_element.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup examples 
  2 |   * @{ 
  3 |   * \defgroup spectral spectral
  4 |   * @{ 
  5 |   * \brief Spectral element methods test/benchmark
  6 |   */
  7 | #include <ctf.hpp>
  8 | using namespace CTF;
  9 | 
 10 | /**
 11 |  * \brief computes the following kernel of the spectral element method
 12 |  *        Given u, D, and diagonal matrices G_{xy} for x,y in [1,3], 
 13 |  *        let E_1 = I x I x D, E_2 = I x D x I, E_3 = D x I x I
 14 |  *        [E_1^T, E_2^t, E_3^T] * [G_{11}, G_{12}, G_{13}] * [E_1] * u
 15 |  *                                [G_{21}, G_{22}, G_{23}]   [E_2]
 16 |  *                                [G_{31}, G_{32}, G_{33}]   [E_3]
 17 |  */
 18 | int spectral(int     n,
 19 |              World & dw){
 20 |   int lens_u[] = {n, n, n};
 21 | 
 22 |   Tensor<> u(3, lens_u);
 23 |   Matrix<> D(n, n);
 24 |   u.fill_random(0.0,1.0);
 25 |   D.fill_random(0.0,1.0);
 26 | 
 27 |   Tensor<> ** G;
 28 |   G = (Tensor<>**)malloc(sizeof(Tensor<>*)*3);
 29 |   for (int a=0; a<3; a++){
 30 |     G[a] = new Tensor<>[3];
 31 |     for (int b=0; b<3; b++){
 32 |       G[a][b] = Tensor<>(3, lens_u);
 33 |       G[a][b].fill_random(0.0,1.0);
 34 |     }
 35 |   }
 36 | 
 37 |   Tensor<> * w = new Tensor<>[3];
 38 |   Tensor<> * z = new Tensor<>[3];
 39 |   for (int a=0; a<3; a++){
 40 |     w[a] = Tensor<>(3, lens_u);
 41 |     z[a] = Tensor<>(3, lens_u);
 42 |   }
 43 |   
 44 |   double st_time = MPI_Wtime();
 45 |   
 46 |   w[0]["ijk"] = D["kl"]*u["ijl"];
 47 |   w[1]["ijk"] = D["jl"]*u["ilk"];
 48 |   w[2]["ijk"] = D["il"]*u["ljk"];
 49 |   
 50 |   for (int a=0; a<3; a++){
 51 |     for (int b=0; b<3; b++){
 52 |       z[a]["ijk"] += G[a][b]["ijk"]*w[b]["ijk"];
 53 |     }
 54 |   }
 55 |    
 56 |   u["ijk"]  = D["lk"]*z[0]["ijl"];
 57 |   u["ijk"] += D["lj"]*z[1]["ilk"];
 58 |   u["ijk"] += D["li"]*z[2]["ljk"];
 59 | 
 60 |   double exe_time = MPI_Wtime() - st_time;
 61 | 
 62 |   bool pass = u.norm2() >= 1.E-6;
 63 | 
 64 |   for (int a=0; a<3; a++){
 65 |     delete [] G[a];
 66 |   }
 67 |   free(G);
 68 |   delete [] w;
 69 |   delete [] z;
 70 | 
 71 |   if (dw.rank == 0){
 72 |     if (pass) 
 73 |       printf("{ Spectral element method } passed \n");
 74 |     else
 75 |       printf("{ spectral element method } failed \n");
 76 |     #ifndef TEST_SUITE
 77 |     printf("Spectral element method on %d*%d*%d grid with %d processors took %lf seconds\n", n,n,n,dw.np,exe_time);
 78 |     #endif
 79 |   }
 80 |   return pass;
 81 | } 
 82 | 
 83 | 
 84 | #ifndef TEST_SUITE
 85 | char* getCmdOption(char ** begin,
 86 |                    char ** end,
 87 |                    const   std::string & option){
 88 |   char ** itr = std::find(begin, end, option);
 89 |   if (itr != end && ++itr != end){
 90 |     return *itr;
 91 |   }
 92 |   return 0;
 93 | }
 94 | 
 95 | 
 96 | int main(int argc, char ** argv){
 97 |   int rank, np, n, pass;
 98 |   int const in_num = argc;
 99 |   char ** input_str = argv;
100 | 
101 |   MPI_Init(&argc, &argv);
102 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
103 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
104 | 
105 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
106 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
107 |     if (n < 0) n = 16;
108 |   } else n = 16;
109 | 
110 |   {
111 |     World dw(argc, argv);
112 | 
113 |     if (rank == 0){
114 |       printf("Running 3D spectral element method with %d*%d*%d grid\n",n,n,n);
115 |     }
116 |     pass = spectral(n, dw);
117 |     assert(pass);
118 |   }
119 | 
120 |   MPI_Finalize();
121 |   return 0;
122 | }
123 | /**
124 |  * @} 
125 |  * @}
126 |  */
127 | 
128 | #endif
129 | 


--------------------------------------------------------------------------------
/scalapack_tests/qr.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup tests 
  2 |   * @{ 
  3 |   * \defgroup qr qr
  4 |   * @{ 
  5 |   * \brief QR factorization of CTF matrices
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | #include "conj.h"
 10 | using namespace CTF;
 11 | 
 12 | 
 13 | template <typename dtype>
 14 | bool qr(Matrix<dtype> A,
 15 |         int     m,
 16 |         int     n,
 17 |         World & dw){
 18 | 
 19 |   // Perform QR
 20 |   Matrix<dtype> Q,R;
 21 |   A.qr(Q,R);
 22 | 
 23 |   // Test orthogonality
 24 |   Matrix<dtype> E(n,n,dw);
 25 | 
 26 |   E["ii"] = 1.;
 27 | 
 28 |   E["ij"] -= Q["ki"]*conj<dtype>(Q)["kj"];
 29 | 
 30 |   bool pass_orthogonality = true;
 31 | 
 32 |   double nrm;
 33 |   E.norm2(nrm);
 34 |   if (nrm > m*n*1.E-6){
 35 |     pass_orthogonality = false;
 36 |   }
 37 | 
 38 |   A["ij"] -= Q["ik"]*R["kj"];
 39 | 
 40 |   bool pass_residual = true;
 41 |   A.norm2(nrm);
 42 |   if (nrm > m*n*n*1.E-6){
 43 |     pass_residual = false;
 44 |   }
 45 | 
 46 | #ifndef TEST_SUITE
 47 |   if (dw.rank == 0){
 48 |     printf("QR orthogonality check returned %d, residual check %d\n", pass_orthogonality, pass_residual);
 49 |   }
 50 | #endif
 51 |   return pass_residual & pass_orthogonality;
 52 | } 
 53 | 
 54 | bool test_qr(int m, int n, World dw){
 55 |   bool pass = true;
 56 |   Matrix<float> A(m,n,dw);
 57 |   Matrix<float> AA(m,n,dw);
 58 |   A.fill_random(0.,1.);
 59 |   AA.fill_random(0.,1.);
 60 |   pass = pass & qr<float>(A,m,n,dw);
 61 | 
 62 |   Matrix<double> B(m,n,dw);
 63 |   Matrix<double> BB(m,n,dw);
 64 |   B.fill_random(0.,1.);
 65 |   BB.fill_random(0.,1.);
 66 |   pass = pass & qr<double>(B,m,n,dw);
 67 | 
 68 |   Matrix<std::complex<float>> cA(m,n,dw);
 69 |   cA["ij"] = Function<float,float,std::complex<float>>([](float a, float b){ return std::complex<float>(a,b); })(A["ij"],AA["ij"]);
 70 |   pass = pass & qr<std::complex<float>>(cA,m,n,dw);
 71 | 
 72 | 
 73 |   Matrix<std::complex<double>> cB(m,n,dw);
 74 |   cB["ij"] = Function<double,double,std::complex<double>>([](double a, double b){ return std::complex<double>(a,b); })(B["ij"],BB["ij"]);
 75 |   pass = pass & qr<std::complex<double>>(cB,m,n,dw);
 76 |   if (dw.rank == 0){
 77 |     if (pass){
 78 |       printf("{ A = QR and Q^TQ = I } passed\n");
 79 |     } else {
 80 |       printf("{ A = QR and Q^TQ = I } failed\n");
 81 |     }
 82 |   } 
 83 | 
 84 | 
 85 |   return pass;
 86 | }
 87 | 
 88 | #ifndef TEST_SUITE
 89 | char* getCmdOption(char ** begin,
 90 |                    char ** end,
 91 |                    const   std::string & option){
 92 |   char ** itr = std::find(begin, end, option);
 93 |   if (itr != end && ++itr != end){
 94 |     return *itr;
 95 |   }
 96 |   return 0;
 97 | }
 98 | 
 99 | 
100 | int main(int argc, char ** argv){
101 |   int rank, np, m, n, pass;
102 |   int const in_num = argc;
103 |   char ** input_str = argv;
104 | 
105 |   MPI_Init(&argc, &argv);
106 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
107 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
108 | 
109 |   if (getCmdOption(input_str, input_str+in_num, "-m")){
110 |     m = atoi(getCmdOption(input_str, input_str+in_num, "-m"));
111 |     if (m < 0) m = 13;
112 |   } else m = 13;
113 | 
114 | 
115 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
116 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
117 |     if (n < 0) n = 7;
118 |   } else n = 7;
119 | 
120 | 
121 |   {
122 |     World dw(argc, argv);
123 | 
124 |     if (rank == 0){
125 |       printf("Testing %d-by-%d QR factorization\n", m, n);
126 |     }
127 |     pass = test_qr(m, n, dw);
128 |     assert(pass);
129 |   }
130 | 
131 |   MPI_Finalize();
132 |   return 0;
133 | }
134 | /**
135 |  * @} 
136 |  * @}
137 |  */
138 | 
139 | #endif
140 | 


--------------------------------------------------------------------------------
/examples/dft_3D.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup examples 
  4 |   * @{ 
  5 |   * \defgroup DFT_3D DFT_3D
  6 |   * @{ 
  7 |   * \brief 3D Discrete Fourier Transform by tensor contractions
  8 |   */
  9 | 
 10 | 
 11 | #include <ctf.hpp>
 12 | using namespace CTF;
 13 | 
 14 | 
 15 | int test_dft_3D(int     n,
 16 |                 World & wrld){
 17 |   int myRank, numPes;
 18 |   int i, j;
 19 |   int64_t  np;
 20 |   int64_t * idx;
 21 |   std::complex<long double> * data;
 22 |   std::complex<long double> imag(0,1);
 23 |   
 24 |   int len[] = {n,n,n};
 25 |   int sym[] = {NS,NS,NS};
 26 |   
 27 |   MPI_Comm_size(MPI_COMM_WORLD, &numPes);
 28 |   MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
 29 | 
 30 |   CTF::Ring< std::complex<long double> > ldr;
 31 | 
 32 |   Matrix < std::complex<long double> >DFT(n, n, SY, wrld, ldr);
 33 |   Matrix < std::complex<long double> >IDFT(n, n, SY, wrld, ldr);
 34 |   Tensor < std::complex<long double> >MESH(3, len, sym, wrld, ldr);
 35 | 
 36 |   DFT.get_local_data(&np, &idx, &data);
 37 | 
 38 |   for (i=0; i<np; i++){
 39 |     data[i] = ((long double)1./n)*exp(-2.*(idx[i]/n)*(idx[i]%n)*((long double)M_PI/n)*imag);
 40 |   }
 41 |   DFT.write(np, idx, data);
 42 |   //DFT.print(stdout);
 43 |   free(idx);
 44 |   delete [] data; 
 45 |   
 46 |   IDFT.get_local_data(&np, &idx, &data);
 47 | 
 48 |   for (i=0; i<np; i++){
 49 |     data[i] = ((long double)1./n)*exp(2.*(idx[i]/n)*(idx[i]%n)*((long double)M_PI/n)*imag);
 50 |   }
 51 |   IDFT.write(np, idx, data);
 52 |   //IDFT.print(stdout);
 53 |   free(idx);
 54 |   delete [] data; 
 55 | 
 56 |   MESH.get_local_data(&np, &idx, &data);
 57 |   for (i=0; i<np; i++){
 58 |     for (j=0; j<n; j++){
 59 |       data[i] += exp(imag*(long double)((-2.*M_PI*(j/(double)(n)))
 60 |                       *((idx[i]%n) + ((idx[i]/n)%n) +(idx[i]/(n*n)))));
 61 |     }
 62 |   }
 63 |   MESH.write(np, idx, data);
 64 |   //MESH.print(stdout);
 65 |   free(idx);
 66 |   delete [] data; 
 67 |   
 68 |   MESH["ijk"] = 1.0*MESH["pqr"]*DFT["ip"]*DFT["jq"]*DFT["kr"];
 69 |  
 70 |   MESH.get_local_data(&np, &idx, &data);
 71 |   //MESH.print(stdout);
 72 |   int pass = 1;
 73 |   for (i=0; i<np; i++){
 74 |     if (idx[i]%n == (idx[i]/n)%n && idx[i]%n == idx[i]/(n*n)){
 75 |       if (fabs((double)data[i].real() - 1.)>=1.E-9) pass = 0;
 76 |     } else {
 77 |       if (fabs((double)data[i].real())>=1.E-9) pass = 0;
 78 |     }
 79 |   }
 80 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 81 |   
 82 |   if (myRank == 0){
 83 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 84 |     if (pass)
 85 |       printf("{ MESH[\"ijk\"] = MESH[\"pqr\"]*DFT[\"ip\"]*DFT[\"jq\"]*DFT[\"kr\"] } passed\n");
 86 |     else
 87 |       printf("{ MESH[\"ijk\"] = MESH[\"pqr\"]*DFT[\"ip\"]*DFT[\"jq\"]*DFT[\"kr\"] } failed\n");
 88 |   } else 
 89 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 90 | 
 91 |   MPI_Barrier(MPI_COMM_WORLD);
 92 | 
 93 | 
 94 |   free(idx);
 95 |   delete [] data; 
 96 |   return pass;
 97 | }
 98 | 
 99 | #ifndef TEST_SUITE
100 | /**
101 |  * \brief Forms N-by-N DFT matrix A and inverse-dft iA and checks A*iA=I
102 |  */
103 | int main(int argc, char ** argv){
104 |   int logn;
105 |   int64_t n;
106 |   MPI_Init(&argc, &argv);
107 | 
108 |   if (argc > 1){
109 |     logn = atoi(argv[1]);
110 |     if (logn<0) logn = 3;
111 |   } else {
112 |     logn = 3;
113 |   }
114 |   n = 1<<logn;
115 | 
116 |   {
117 |     World dw(argc, argv);
118 |     int pass = test_dft_3D(n, dw);
119 |     assert(pass);
120 |   }
121 | 
122 |   MPI_Finalize();
123 |   
124 | }
125 | /**
126 |  * @} 
127 |  * @}
128 |  */
129 | 
130 | #endif
131 | 


--------------------------------------------------------------------------------
/src/redistribution/nosym_transp.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __NOSYM_TRANSP_H__
 4 | #define __NOSYM_TRANSP_H__
 5 | 
 6 | #include "../tensor/algstrct.h"
 7 | 
 8 | namespace CTF_int {
 9 |   class tensor;
10 | 
11 |   /**
12 |    * \brief transposes a non-symmetric (folded) tensor
13 |    *
14 |    * \param[in] order dimension of tensor
15 |    * \param[in] new_order new ordering of dimensions
16 |    * \param[in] edge_len original edge lengths
17 |    * \param[in,out] data data tp transpose
18 |    * \param[in] dir which way are we going?
19 |    * \param[in] sr algstrct defining element size
20 |    */
21 |   void nosym_transpose(int              order,
22 |                        int const *      new_order,
23 |                        int const *      edge_len,
24 |                        char *           data,
25 |                        int              dir,
26 |                        algstrct const * sr);
27 |   /**
28 |    * \brief estimates time needed to transposes a non-symmetric (folded) tensor based on performance models
29 |    *
30 |    * \param[in] order dimension of tensor
31 |    * \param[in] new_order new ordering of dimensions
32 |    * \param[in] edge_len original edge lengths
33 |    * \param[in] dir which way are we going?
34 |    * \param[in] sr algstrct defining element size
35 |    * \return estimated time in seconds
36 |    */
37 |   double est_time_transp(int              order,
38 |                          int const *      new_order,
39 |                          int const *      edge_len,
40 |                          int              dir,
41 |                          algstrct const * sr);
42 | 
43 |   void nosym_transpose(tensor *    A,
44 |                        int         all_fdim_A,
45 |                        int const * all_flen_A,
46 |                        int const * new_order,
47 |                        int         dir);
48 |   /**
49 |    * \brief transposes a non-symmetric (folded) tensor internal kernel
50 |    *
51 |    * \param[in] order dimension of tensor
52 |    * \param[in] new_order new ordering of dimensions
53 |    * \param[in] edge_len original edge lengths
54 |    * \param[in] data data tp transpose
55 |    * \param[in] dir which way are we going?
56 |    * \param[in] max_ntd how many threads to use
57 |    * \param[out] tswap_data tranposed data
58 |    * \param[out] chunk_size chunk sizes of tranposed data
59 |    * \param[in] sr algstrct defining element size
60 |    */
61 |   void nosym_transpose(int              order,
62 |                        int const *      new_order,
63 |                        int const *      edge_len,
64 |                        char const *     data,
65 |                        int              dir,
66 |                        int              max_ntd,
67 |                        char **          tswap_data,
68 |                        int64_t *        chunk_size,
69 |                        algstrct const * sr);
70 | 
71 |   /**
72 |    * \brief Checks if the HPTT library is applicable
73 |    * \param[in] order dimension of tensor
74 |    * \param[in] new_order new ordering of dimensions
75 |    * \param[in] elementSize element size
76 |    */
77 |   bool hptt_is_applicable(int order, int const * new_order, int elementSize);
78 | 
79 |   /**
80 |    * \brief High-performance implementation of nosym_transpose using HPTT
81 |    *
82 |    * \param[in] order dimension of tensor
83 |    * \param[in] edge_len original edge lengths
84 |    * \param[in] dir which way are we going?
85 |    * \param[in,out] A tensor to be transposed
86 |    */
87 |   void nosym_transpose_hptt(int         order,
88 |                        int const *      edge_len,
89 |                        int              dir,
90 |                        tensor *         &A);
91 | }
92 | #endif
93 | 


--------------------------------------------------------------------------------
/src/redistribution/glb_cyclic_reshuffle.h:
--------------------------------------------------------------------------------
 1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
 2 | 
 3 | #ifndef __GLB_CYCLIC_RESHUFFLE_H__
 4 | #define __GLB_CYCLIC_RESHUFFLE_H__
 5 | 
 6 | #include "../tensor/algstrct.h"
 7 | #include "../mapping/distribution.h"
 8 | #include "redist.h"
 9 | 
10 | namespace CTF_int {
11 |   /**
12 |    * \brief reorder local buffer so that elements are in ordered according to where they
13 |    *        are in the global tensor (interleave virtual blocks)
14 |    * \param[in] sym symmetry relations between tensor dimensions
15 |    * \param[in] dist distribution of data
16 |    * \param[in] virt_edge_len dimensions of each block
17 |    * \param[in] virt_phase_lda prefix sum of virtual blocks
18 |    * \param[in] vbs size of virtual blocks
19 |    * \param[in] dir if 1 then go to global layout, if 0 than from
20 |    * \param[in] tsr_data_in starting data buffer
21 |    * \param[out] tsr_data_out target data buffer
22 |    * \param[in] sr algstrct defining data
23 |    */
24 |   void order_globally(int const *          sym,
25 |                       distribution const & dist,
26 |                       int const *          virt_edge_len,
27 |                       int const *          virt_phase_lda,
28 |                       int64_t              vbs,
29 |                       bool                 dir,
30 |                       char const *         tsr_data_in,
31 |                       char *               tsr_data_out,
32 |                       algstrct const *     sr);
33 | 
34 |   /**
35 |    * \brief Goes from any set of phases to any new set of phases 
36 |    * \param[in] sym symmetry relations between tensor dimensions
37 |    * \param[in] old_dist starting data distrubtion
38 |    * \param[in] old_offsets old offsets of each tensor edge (corner 1 of slice)
39 |    * \param[in] old_permutation permutation array for each edge length (no perm if NULL)
40 |    * \param[in] new_dist target data distrubtion
41 |    * \param[in] new_offsets old offsets of each tensor edge (corner 1 of slice)
42 |    * \param[in] new_permutation permutation array for each edge length (no perm if NULL)
43 |    * \param[in] ptr_tsr_data starting data buffer
44 |    * \param[out] ptr_tsr_cyclic_data target data buffer
45 |    * \param[in] sr algstrct defining data
46 |    * \param[in] ord_glb_comm communicator on which to redistribute
47 |    * \param[in] reuse_buffers if 1: ptr_tsr_cyclic_data is allocated dynamically and ptr_tsr_data 
48 |    *                                 is overwritten with intermediate data
49 |    *                          if 0: ptr_tsr_cyclic_data is preallocated and can be scaled by beta,
50 |    *                                 however, more memory is used for temp buffers
51 |    * \param[in] alpha scaling tensor for new data
52 |    * \param[in] beta scaling tensor for original data
53 |    */
54 | //  void glb_cyclic_reshuffle(int const *          sym,
55 |   char * glb_cyclic_reshuffle(int const *          sym,
56 |                             distribution const & old_dist,
57 |                             int const *          old_offsets,
58 |                             int * const *        old_permutation,
59 |                             distribution const & new_dist,
60 |                             int const *          new_offsets,
61 |                             int * const *        new_permutation,
62 |                             char **              ptr_tsr_data,
63 |                             char **              ptr_tsr_cyclic_data,
64 |                             algstrct const *     sr,
65 |                             CommData             ord_glb_comm,
66 |                             bool                 reuse_buffers,
67 |                             char const *         alpha,
68 |                             char const *         beta);
69 | }
70 | #endif
71 | 


--------------------------------------------------------------------------------
/bench/bench_contraction.cxx:
--------------------------------------------------------------------------------
  1 | /** Copyright (c) 2011, Edgar Solomonik, all rights reserved.
  2 |   * \addtogroup benchmarks
  3 |   * @{ 
  4 |   * \addtogroup bench_contractions
  5 |   * @{ 
  6 |   * \brief Benchmarks arbitrary NS contraction
  7 |   */
  8 | 
  9 | #include <stdio.h>
 10 | #include <stdlib.h>
 11 | #include <string.h>
 12 | #include <string>
 13 | #include <math.h>
 14 | #include <assert.h>
 15 | #include <algorithm>
 16 | #include <ctf.hpp>
 17 | #include "../src/shared/util.h"
 18 | 
 19 | int bench_contraction(int          n,
 20 |                       int          niter,
 21 |                       char const * iA,
 22 |                       char const * iB,
 23 |                       char const * iC,
 24 |                       CTF_World   &dw){
 25 | 
 26 |   int rank, i, num_pes;
 27 |   
 28 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 29 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 30 | 
 31 |   int order_A, order_B, order_C;
 32 |   order_A = strlen(iA);
 33 |   order_B = strlen(iB);
 34 |   order_C = strlen(iC);
 35 | 
 36 |   int NS_A[order_A];
 37 |   int NS_B[order_B];
 38 |   int NS_C[order_C];
 39 |   int n_A[order_A];
 40 |   int n_B[order_B];
 41 |   int n_C[order_C];
 42 | 
 43 |   for (i=0; i<order_A; i++){
 44 |     n_A[i] = n;
 45 |     NS_A[i] = NS;
 46 |   }
 47 |   for (i=0; i<order_B; i++){
 48 |     n_B[i] = n;
 49 |     NS_B[i] = NS;
 50 |   }
 51 |   for (i=0; i<order_C; i++){
 52 |     n_C[i] = n;
 53 |     NS_C[i] = NS;
 54 |   }
 55 | 
 56 | 
 57 |   //* Creates distributed tensors initialized with zeros
 58 |   CTF_Tensor A(order_A, n_A, NS_A, dw, "A", 1);
 59 |   CTF_Tensor B(order_B, n_B, NS_B, dw, "B", 1);
 60 |   CTF_Tensor C(order_C, n_C, NS_C, dw, "C", 1);
 61 | 
 62 |   double st_time = MPI_Wtime();
 63 | 
 64 |   for (i=0; i<niter; i++){
 65 |     C[iC] += A[iA]*B[iB];
 66 |   }
 67 | 
 68 |   double end_time = MPI_Wtime();
 69 | 
 70 |   if (rank == 0)
 71 |     printf("Performed %d iterations of C[\"%s\"] += A[\"%s\"]*B[\"%s\"] in %lf sec/iter\n", 
 72 |            niter, iC, iA, iB, (end_time-st_time)/niter);
 73 | 
 74 |   return 1;
 75 | } 
 76 | 
 77 | char* getCmdOption(char ** begin,
 78 |                    char ** end,
 79 |                    const   std::string & option){
 80 |   char ** itr = std::find(begin, end, option);
 81 |   if (itr != end && ++itr != end){
 82 |     return *itr;
 83 |   }
 84 |   return 0;
 85 | }
 86 | 
 87 | 
 88 | int main(int argc, char ** argv){
 89 |   int rank, np, niter, n;
 90 |   int const in_num = argc;
 91 |   char ** input_str = argv;
 92 |   char const * A;
 93 |   char const * B;
 94 |   char const * C;
 95 | 
 96 |   MPI_Init(&argc, &argv);
 97 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 98 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
 99 | 
100 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
101 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
102 |     if (n < 0) n = 4;
103 |   } else n = 4;
104 | 
105 |   if (getCmdOption(input_str, input_str+in_num, "-niter")){
106 |     niter = atoi(getCmdOption(input_str, input_str+in_num, "-niter"));
107 |     if (niter < 0) niter = 3;
108 |   } else niter = 3;
109 | 
110 |   if (getCmdOption(input_str, input_str+in_num, "-A")){
111 |     A = getCmdOption(input_str, input_str+in_num, "-A");
112 |   } else A = "ik";
113 |   if (getCmdOption(input_str, input_str+in_num, "-B")){
114 |     B = getCmdOption(input_str, input_str+in_num, "-B");
115 |   } else B = "kj";
116 |   if (getCmdOption(input_str, input_str+in_num, "-C")){
117 |     C = getCmdOption(input_str, input_str+in_num, "-C");
118 |   } else C = "ij";
119 | 
120 | 
121 | 
122 |   {
123 |     CTF_World dw(argc, argv);
124 |     int pass = bench_contraction(n, niter, A, B, C, dw);
125 |     assert(pass);
126 |   }
127 | 
128 | 
129 |   MPI_Finalize();
130 |   return 0;
131 | }
132 | /**
133 |  * @} 
134 |  * @}
135 |  */
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/test/endomorphism_cust.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | /** \addtogroup tests 
  4 |   * @{ 
  5 |   * \defgroup endomorphism_cust endomorphism_cust
  6 |   * @{ 
  7 |   * \brief tests custom element-wise functions by implementing division elementwise on 4D tensors
  8 |   */
  9 | 
 10 | #include <ctf.hpp>
 11 | using namespace CTF;
 12 | 
 13 | struct cust_type {
 14 |   char name[256];
 15 |   int len_name;
 16 | };
 17 | 
 18 | cust_type cadd(cust_type a, cust_type b){
 19 |   if (strlen(a.name) >= strlen(b.name)) return a;
 20 |   else return b;
 21 | }
 22 | 
 23 | void mpi_cadd(void * a, void * b, int * len, MPI_Datatype * d){
 24 |   for (int i=0; i<*len; i++){
 25 |     ((cust_type*)b)[i] = cadd(((cust_type*)a)[i], ((cust_type*)b)[i]);
 26 | 
 27 |   }
 28 | }
 29 | 
 30 | int endomorphism_cust(int     n,
 31 |                       World & dw){
 32 |   
 33 |   int shapeN4[] = {NS,NS,NS,NS};
 34 |   int sizeN4[] = {n+1,n,n+2,n+3};
 35 | 
 36 |   cust_type addid;
 37 |   addid.name[0] = '\0';
 38 |   addid.len_name = 0;
 39 | 
 40 |   MPI_Op mop;
 41 |   MPI_Op_create(&mpi_cadd, 1, &mop);
 42 | 
 43 |   Monoid<cust_type, false> m = Monoid<cust_type, false>(addid, &cadd, mop);
 44 | 
 45 |   Tensor<cust_type> A(4, sizeN4, shapeN4, dw, m);
 46 | 
 47 |   int64_t * inds;
 48 |   cust_type * vals;
 49 |   int64_t nvals;  
 50 | 
 51 |   A.get_local_data(&nvals, &inds, &vals);
 52 | 
 53 |   srand48(dw.rank);
 54 |   for (int64_t i=0; i<nvals; i++){
 55 |     int str_len = drand48()*250;
 56 |     std::fill(vals[i].name, vals[i].name+str_len, 'a');
 57 |     vals[i].name[str_len]='\0';
 58 |   }
 59 |  
 60 |   A.write(nvals, inds, vals);
 61 | 
 62 |   CTF::Transform<cust_type> endo(
 63 |     [](cust_type & a){
 64 |       a.len_name = strlen(a.name);
 65 |     });
 66 |   // below is equivalent to A.scale(NULL, "ijkl", endo);
 67 |   endo(A["ijkl"]);
 68 | 
 69 | 
 70 |   int64_t * indices;
 71 |   cust_type * loc_data;
 72 |   int64_t nloc;
 73 |   A.get_local_data(&nloc, &indices, &loc_data);
 74 | 
 75 |   int pass = 1;
 76 |   if (pass){
 77 |     for (int64_t i=0; i<nloc; i++){
 78 |       if ((int)strlen(loc_data[i].name) != loc_data[i].len_name) pass = 0;
 79 |     }
 80 |   } 
 81 |   MPI_Allreduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD);
 82 | 
 83 |   if (dw.rank == 0){
 84 |     if (pass){
 85 |       printf("{ A[\"ijkl\"] = comp_len(A[\"ijkl\"]) } passed\n");
 86 |     } else {
 87 |       printf("{ A[\"ijkl\"] = comp_len(A[\"ijkl\"]) } failed\n");
 88 |     }
 89 |   } 
 90 | 
 91 |   free(indices);
 92 |   delete [] loc_data;
 93 |   delete [] vals;
 94 |   free(inds);
 95 |   
 96 |   return pass;
 97 | } 
 98 | 
 99 | 
100 | #ifndef TEST_SUITE
101 | 
102 | char* getCmdOption(char ** begin,
103 |                    char ** end,
104 |                    const   std::string & option){
105 |   char ** itr = std::find(begin, end, option);
106 |   if (itr != end && ++itr != end){
107 |     return *itr;
108 |   }
109 |   return 0;
110 | }
111 | 
112 | 
113 | int main(int argc, char ** argv){
114 |   int rank, np, n;
115 |   int const in_num = argc;
116 |   char ** input_str = argv;
117 | 
118 |   MPI_Init(&argc, &argv);
119 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
120 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
121 | 
122 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
123 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
124 |     if (n < 0) n = 5;
125 |   } else n = 5;
126 | 
127 | 
128 |   {
129 |     World dw(MPI_COMM_WORLD, argc, argv);
130 | 
131 |     if (rank == 0){
132 |       printf("Computing user-defined endomorphism on a tensor over a monoid, A_ijkl = f(A_ijkl)\n");
133 |     }
134 |     endomorphism_cust(n, dw);
135 |   }
136 | 
137 | 
138 |   MPI_Finalize();
139 |   return 0;
140 | }
141 | 
142 | /**
143 |  * @} 
144 |  * @}
145 |  */
146 | 
147 | #endif
148 | 


--------------------------------------------------------------------------------
/src/shared/offload.h:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2014, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | #ifndef __OFFLOAD_H__
  4 | #define __OFFLOAD_H__
  5 | 
  6 | //#include "../interface/common.h"
  7 | 
  8 | namespace CTF_int{
  9 |   class algstrct;
 10 |   
 11 |   /** \brief initialize offloading, e.g. create cublas */
 12 |   void offload_init();
 13 |   /** \brief exit offloading, e.g. destroy cublas */
 14 |   void offload_exit();
 15 | 
 16 |   /** \brief estimate time it takes to upload */
 17 |   double estimate_download_time(int64_t size);
 18 | 
 19 |   /** \brief estimate time it takes to download */
 20 |   double estimate_upload_time(int64_t size);
 21 |  
 22 |   /** \brief offloaded array/buffer */
 23 |   class offload_arr {
 24 |     public:
 25 |       /** \brief device pointer */
 26 |       char * dev_spr;
 27 |       /** \brief number of bytes */
 28 |       int64_t nbytes;
 29 |       
 30 |       /**
 31 |        * \brief constructor allocates device buffer
 32 |        * \param[in] nbytes number of elements
 33 |        */
 34 |       offload_arr(int64_t nbytes);
 35 |   
 36 |       /**
 37 |        * \brief destructor allocates device buffer
 38 |        */
 39 |       ~offload_arr();
 40 |   
 41 |       /**
 42 |        * \brief read data from device to host pointer
 43 |        * \param[in,out] host_spr (should be preallocated)
 44 |        */
 45 |       void download(char * host_spr);
 46 |   
 47 |       /**
 48 |        * \brief write data from host to device
 49 |        * \param[in] host_spr
 50 |        */
 51 |       void upload(char const * host_spr);
 52 |   };
 53 | 
 54 |   /** \brief offloaded and serialized tensor data */
 55 |   class offload_tsr : public offload_arr {
 56 |     public:
 57 |       /** \brief algebraic structure */
 58 |       algstrct const * sr;
 59 |       /** \brief number of elements */
 60 |       int64_t size;
 61 |       
 62 |       /**
 63 |        * \brief constructor allocates device buffer
 64 |        * \param[in] sr algebraic structure
 65 |        * \param[in] size number of elements
 66 |        */
 67 |       offload_tsr(algstrct const * sr, int64_t size);
 68 | 
 69 |       /**
 70 |        * \brief set device data to zero
 71 |        */
 72 |       void set_zero();
 73 |   };
 74 | 
 75 |   
 76 |   /**
 77 |    * \brief allocate a pinned host buffer
 78 |    * \param[out] ptr pointer to define
 79 |    * \param[in] size amount of buffer space to allocate
 80 |    */
 81 |   void host_pinned_alloc(void ** ptr, int64_t size);
 82 |   
 83 |   /**
 84 |    * \brief free a pinned host buffer
 85 |    * \param[in] ptr pointer to free
 86 |    */
 87 |   void host_pinned_free(void * ptr);
 88 |   
 89 |   template <typename dtype>
 90 |   void offload_gemm(char          tA,
 91 |                     char          tB,
 92 |                     int           m,
 93 |                     int           n,
 94 |                     int           k,
 95 |                     dtype         alpha,
 96 |                     offload_tsr & A,
 97 |                     int           lda_A,
 98 |                     offload_tsr & B,
 99 |                     int           lda_B,
100 |                     dtype         beta,
101 |                     offload_tsr & C,
102 |                     int           lda_C);
103 |   
104 |   template <typename dtype>
105 |   void offload_gemm(char                 tA,
106 |                     char                 tB,
107 |                     int                  m,
108 |                     int                  n,
109 |                     int                  k,
110 |                     dtype                alpha,
111 |                     dtype const        * dev_A,
112 |                     int                  lda_A,
113 |                     dtype const        * dev_B,
114 |                     int                  lda_B,
115 |                     dtype                beta,
116 |                     dtype              * dev_C,
117 |                     int                  lda_C);
118 | }  
119 | #endif
120 | 
121 | 


--------------------------------------------------------------------------------
/examples/trace.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | /** \addtogroup examples 
  3 |   * @{ 
  4 |   * \defgroup trace trace
  5 |   * @{ 
  6 |   * \brief tests trace over diagonal of Matrices
  7 |   */
  8 | 
  9 | #include <ctf.hpp>
 10 | using namespace CTF;
 11 | 
 12 | int trace(int const     n,
 13 |           World    &dw){
 14 |   int rank, i, num_pes;
 15 |   int64_t np;
 16 |   double * pairs;
 17 |   double tr1, tr2, tr3, tr4;
 18 |   int64_t * indices;
 19 |   
 20 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 21 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 22 | 
 23 |   Matrix<> A(n, n, NS, dw);
 24 |   Matrix<> B(n, n, NS, dw);
 25 |   Matrix<> C(n, n, NS, dw);
 26 |   Matrix<> D(n, n, NS, dw);
 27 |   Matrix<> C1(n, n, NS, dw);
 28 |   Matrix<> C2(n, n, NS, dw);
 29 |   Matrix<> C3(n, n, NS, dw);
 30 |   Matrix<> C4(n, n, NS, dw);
 31 |   Vector<> DIAG(n, dw);
 32 | 
 33 |   srand48(13*rank);
 34 | 
 35 |   A.get_local_data(&np, &indices, &pairs);
 36 |   for (i=0; i<np; i++ ) pairs[i] = drand48();;
 37 |   A.write(np, indices, pairs);
 38 |   delete [] pairs;
 39 |   free(indices);
 40 |   B.get_local_data(&np, &indices, &pairs);
 41 |   for (i=0; i<np; i++ ) pairs[i] = drand48();
 42 |   B.write(np, indices, pairs);
 43 |   delete [] pairs;
 44 |   free(indices);
 45 |   C.get_local_data(&np, &indices, &pairs);
 46 |   for (i=0; i<np; i++ ) pairs[i] = drand48();
 47 |   C.write(np, indices, pairs);
 48 |   delete [] pairs;
 49 |   free(indices);
 50 |   D.get_local_data(&np, &indices, &pairs);
 51 |   for (i=0; i<np; i++ ) pairs[i] = drand48();
 52 |   D.write(np, indices, pairs);
 53 |   delete [] pairs;
 54 |   free(indices);
 55 |   
 56 | 
 57 |   C1["ij"] = A["ia"]*B["ab"]*C["bc"]*D["cj"];
 58 |   C2["ij"] = D["ia"]*A["ab"]*B["bc"]*C["cj"];
 59 |   C3["ij"] = C["ia"]*D["ab"]*A["bc"]*B["cj"];
 60 |   C4["ij"] = B["ia"]*C["ab"]*D["bc"]*A["cj"];
 61 | 
 62 | 
 63 |   DIAG["i"] = C1["ii"];
 64 |   tr1 = DIAG.reduce(CTF::OP_SUM);
 65 |   
 66 |   DIAG["i"] = C2["ii"];
 67 |   tr2 = DIAG.reduce(CTF::OP_SUM);
 68 |   DIAG["i"] = C3["ii"];
 69 |   tr3 = DIAG.reduce(CTF::OP_SUM);
 70 |   DIAG["i"] = C4["ii"];
 71 |   tr4 = DIAG.reduce(CTF::OP_SUM);
 72 |  
 73 |   int pass = 1; 
 74 |   if (rank == 0){
 75 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 76 |     /*printf("tr(ABCD)=%lf, tr(DABC)=%lf, tr(CDAB)=%lf, tr(BCDA)=%lf\n",
 77 |             tr1, tr2, tr3, tr4);*/
 78 |     if (fabs(tr1-tr2)/tr1>1.E-10 || fabs(tr2-tr3)/tr2>1.E-10 || fabs(tr3-tr4)/tr3>1.E-10){
 79 |       pass = 0;
 80 |     }
 81 |     if (!pass){
 82 |       printf("{ tr(ABCD) = tr(DABC) = tr(CDAB) = tr(BCDA) } failed\n");
 83 |     } else {
 84 |       printf("{ tr(ABCD) = tr(DABC) = tr(CDAB) = tr(BCDA) } passed\n");
 85 |     }
 86 |   } else 
 87 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 88 |   return pass;
 89 |   
 90 | } 
 91 | 
 92 | 
 93 | #ifndef TEST_SUITE
 94 | char* getCmdOption(char ** begin,
 95 |                  char ** end,
 96 |                  const   std::string & option){
 97 |   char ** itr = std::find(begin, end, option);
 98 |   if (itr != end && ++itr != end){
 99 |     return *itr;
100 |   }
101 |   return 0;
102 | }
103 | 
104 | 
105 | int main(int argc, char ** argv){
106 |   int rank, np, n;
107 |   int const in_num = argc;
108 |   char ** input_str = argv;
109 | 
110 |   MPI_Init(&argc, &argv);
111 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
112 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
113 | 
114 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
115 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
116 |     if (n < 0) n = 7;
117 |   } else n = 7;
118 | 
119 | 
120 |   {
121 |     World dw(MPI_COMM_WORLD, argc, argv);
122 |     if (rank == 0){
123 |       printf("Checking trace calculation n = %d, p = %d:\n",n,np);
124 |     }
125 |     int pass = trace(n,dw);
126 |     assert(pass);
127 |   }
128 | 
129 |   MPI_Finalize();
130 |   return 0;
131 | }
132 | /**
133 |  * @} 
134 |  * @}
135 |  */
136 | 
137 | #endif
138 | 


--------------------------------------------------------------------------------
/examples/jacobi.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup examples 
  2 |   * @{ 
  3 |   * \defgroup jacobi jacobi
  4 |   * @{ 
  5 |   * \brief Jacobi iterative method using gemv and spmv
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | using namespace CTF;
 10 | 
 11 | // compute a single Jacobi iteration to get new x, elementwise: x_i <== d_i*(b_i-sum_j R_ij*x_j)
 12 | // solves Ax=b where R_ij=A_ij for i!=j, while R_ii=0, and d_i=1/A_ii
 13 | void jacobi_iter(Matrix<> & R, Vector<> & b, Vector<> & d, Vector<> &x){
 14 |   x["i"] = -R["ij"]*x["j"];
 15 |   x["i"] += b["i"];
 16 |   x["i"] *= d["i"];
 17 | }
 18 | 
 19 | int jacobi(int     n,
 20 |            World & dw){
 21 | 
 22 |   Matrix<> spA(n, n, SP, dw, "spA");
 23 |   Matrix<> dnA(n, n, dw, "dnA");
 24 |   Vector<> b(n, dw);
 25 |   Vector<> c1(n, dw);
 26 |   Vector<> c2(n, dw);
 27 |   Vector<> res(n, dw);
 28 | 
 29 |   srand48(dw.rank);
 30 |   b.fill_random(0.0,1.0);
 31 |   c1.fill_random(0.0,1.0);
 32 |   c2["i"] = c1["i"];
 33 | 
 34 |   //make diagonally dominant matrix
 35 |   dnA.fill_random(0.0,1.0);
 36 |   spA["ij"] += dnA["ij"];
 37 |   //sparsify
 38 |   spA.sparsify(.5);
 39 |   spA["ii"] += 2.*n;
 40 |   dnA["ij"] = spA["ij"];
 41 | 
 42 |   Vector<> d(n, dw);
 43 |   d["i"] = spA["ii"];
 44 |   Transform<> inv([](double & d){ d=1./d; });
 45 |   inv(d["i"]);
 46 |   
 47 |   Matrix<> spR(n, n, SP, dw, "spR");
 48 |   Matrix<> dnR(n, n, dw, "dnR");
 49 |   spR["ij"] = spA["ij"];
 50 |   dnR["ij"] = dnA["ij"];
 51 |   spR["ii"] = 0;
 52 |   dnR["ii"] = 0;
 53 | 
 54 | /*  spR.print(); 
 55 |   dnR.print(); */
 56 |  
 57 |   //do up to 100 iterations
 58 |   double res_norm;
 59 |   int iter;
 60 |   for (iter=0; iter<100; iter++){
 61 |     jacobi_iter(dnR, b, d, c1);
 62 | 
 63 |     res["i"]  = b["i"];
 64 |     res["i"] -= dnA["ij"]*c1["j"];
 65 | 
 66 |     res_norm = res.norm2();
 67 |     if (res_norm < 1.E-4) break;
 68 |   }
 69 | #ifndef TEST_SUITE
 70 |   if (dw.rank == 0)
 71 |     printf("Completed %d iterations of Jacobi with dense matrix, residual F-norm is %E\n", iter, res_norm);
 72 | #endif
 73 | 
 74 |   for (iter=0; iter<100; iter++){
 75 |     jacobi_iter(spR, b, d, c2);
 76 | 
 77 |     res["i"]  = b["i"];
 78 |     res["i"] -= spA["ij"]*c2["j"];
 79 | 
 80 |     res_norm = res.norm2();
 81 |     if (res_norm < 1.E-4) break;
 82 |   }
 83 | #ifndef TEST_SUITE
 84 |   if (dw.rank == 0)
 85 |     printf("Completed %d iterations of Jacobi with sparse matrix, residual F-norm is %E\n", iter, res_norm);
 86 | #endif
 87 | 
 88 |   c2["i"] -= c1["i"];
 89 | 
 90 |   bool pass = c2.norm2() <= 1.E-6;
 91 | 
 92 |   if (dw.rank == 0){
 93 |     if (pass) 
 94 |       printf("{ Jacobi x[\"i\"] = (1./A[\"ii\"])*(b[\"j\"] - (A[\"ij\"]-A[\"ii\"])*x[\"j\"]) with sparse A } passed \n");
 95 |     else
 96 |       printf("{ Jacobi x[\"i\"] = (1./A[\"ii\"])*(b[\"j\"] - (A[\"ij\"]-A[\"ii\"])*x[\"j\"]) with sparse A } failed \n");
 97 |   }
 98 |   return pass;
 99 | } 
100 | 
101 | 
102 | #ifndef TEST_SUITE
103 | char* getCmdOption(char ** begin,
104 |                    char ** end,
105 |                    const   std::string & option){
106 |   char ** itr = std::find(begin, end, option);
107 |   if (itr != end && ++itr != end){
108 |     return *itr;
109 |   }
110 |   return 0;
111 | }
112 | 
113 | 
114 | int main(int argc, char ** argv){
115 |   int rank, np, n, pass;
116 |   int const in_num = argc;
117 |   char ** input_str = argv;
118 | 
119 |   MPI_Init(&argc, &argv);
120 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
121 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
122 | 
123 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
124 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
125 |     if (n < 0) n = 7;
126 |   } else n = 7;
127 | 
128 | 
129 |   {
130 |     World dw(argc, argv);
131 | 
132 |     if (rank == 0){
133 |       printf("Running Jacobi method on random %d-by-%d sparse matrix\n",n,n);
134 |     }
135 |     pass = jacobi(n, dw);
136 |     assert(pass);
137 |   }
138 | 
139 |   MPI_Finalize();
140 |   return 0;
141 | }
142 | /**
143 |  * @} 
144 |  * @}
145 |  */
146 | 
147 | #endif
148 | 


--------------------------------------------------------------------------------
/examples/sssp.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup examples 
  2 |   * @{ 
  3 |   * \defgroup sssp sssp
  4 |   * @{ 
  5 |   * \brief single-source shortest-paths via the Bellman-Ford algorithm
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | #include <float.h>
 10 | using namespace CTF;
 11 | 
 12 | 
 13 | // return false if there are negative cycles, true otherwise
 14 | template <typename t>
 15 | bool Bellman_Ford(Matrix<t> A, Vector<t> P, int n){
 16 |   Vector<t> Q(P);
 17 |   int r = 0;
 18 |   int new_tot_wht = P["ij"];
 19 |   int tot_wht;
 20 |   do { 
 21 |     if (r == n+1) return false;      // exit if we did not converge in n iterations
 22 |     else r++;
 23 |     Q["i"]  = P["i"];              // save old distances
 24 |     P["i"] += A["ij"]*P["j"];      // update distances 
 25 |     tot_wht = new_tot_wht;
 26 |     new_tot_wht = P["ij"];
 27 |     assert(new_tot_wht <= tot_wht);
 28 |   } while (new_tot_wht < tot_wht); // continue so long as some distance got shorter
 29 |   return true;
 30 | }
 31 | 
 32 | // calculate SSSP on a graph of n nodes distributed on World (communicator) dw
 33 | int sssp(int     n,
 34 |          World & dw){
 35 | 
 36 |   //tropical semiring, define additive identity to be n*n (max weight) to prevent integer overflow
 37 |   Semiring<int> s(n*n, 
 38 |                   [](int a, int b){ return std::min(a,b); },
 39 |                   MPI_MIN,
 40 |                   0,
 41 |                   [](int a, int b){ return a+b; });
 42 | 
 43 |   //random adjacency matrix
 44 |   Matrix<int> A(n, n, dw, s);
 45 |   srand(dw.rank);
 46 |   A.fill_random(0, n*n); 
 47 | 
 48 |   A["ii"] = n*n;
 49 | 
 50 |   A.sparsify([=](int a){ return a<5*n; });
 51 | 
 52 |   Vector<int> v(n, dw, s);
 53 |   if (dw.rank == 0){
 54 |     int64_t idx = 0;
 55 |     int val = 0;
 56 |     v.write(1, &idx, &val);
 57 |   } else v.write(0, NULL, NULL);
 58 | 
 59 |   //make sure we converged
 60 |   int pass = Bellman_Ford(A, v, n);
 61 |   if (n>=3){
 62 |     v["i"] = n*n;
 63 |     if (dw.rank == 0){
 64 |       int64_t idx = 0;
 65 |       int val = 0;
 66 |       v.write(1, &idx, &val);
 67 |     } else v.write(0, NULL, NULL);
 68 | 
 69 | 
 70 |     // add a negative cycle to A
 71 |     if (dw.rank == 0){
 72 |       int64_t idx[] = {1,n+2,2*n+0};
 73 |       int val[] = {1, -1, -1};
 74 |       A.write(3, idx, val);
 75 |     } else A.write(0, NULL, NULL);
 76 |     //make sure we did not converge
 77 |     int pass2 = Bellman_Ford(A, v, n);
 78 |     pass = pass & !pass2;
 79 |   }
 80 | 
 81 |   if (dw.rank == 0){
 82 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 83 |     if (pass) 
 84 |       printf("{ negative cycle check via Bellman-Ford } passed \n");
 85 |     else
 86 |       printf("{ negative cycle check via Bellman-Ford } failed \n");
 87 |   } else 
 88 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 89 |   return pass;
 90 | } 
 91 | 
 92 | 
 93 | #ifndef TEST_SUITE
 94 | char* getCmdOption(char ** begin,
 95 |                    char ** end,
 96 |                    const   std::string & option){
 97 |   char ** itr = std::find(begin, end, option);
 98 |   if (itr != end && ++itr != end){
 99 |     return *itr;
100 |   }
101 |   return 0;
102 | }
103 | 
104 | 
105 | int main(int argc, char ** argv){
106 |   int rank, np, n, pass;
107 |   int const in_num = argc;
108 |   char ** input_str = argv;
109 | 
110 |   MPI_Init(&argc, &argv);
111 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
112 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
113 | 
114 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
115 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
116 |     if (n < 0) n = 7;
117 |   } else n = 7;
118 | 
119 |   {
120 |     World dw(argc, argv);
121 | 
122 |     if (rank == 0){
123 |       printf("Computing SSSP on sparse graph with %d nodes using the Bellman-Ford algorithm\n",n);
124 |     }
125 |     pass = sssp(n, dw);
126 |     assert(pass);
127 |   }
128 | 
129 |   MPI_Finalize();
130 |   return 0;
131 | }
132 | /**
133 |  * @} 
134 |  * @}
135 |  */
136 | 
137 | #endif
138 | 


--------------------------------------------------------------------------------
/examples/scan.cxx:
--------------------------------------------------------------------------------
  1 | /** \addtogroup examples 
  2 |   * @{ 
  3 |   * \defgroup scan scan
  4 |   * @{ 
  5 |   * \brief scan iterative method using gemv and spmv
  6 |   */
  7 | 
  8 | #include <ctf.hpp>
  9 | #include <float.h>
 10 | using namespace CTF;
 11 | 
 12 | template <typename dtype>
 13 | void rec_scan(Tensor<dtype> & V){
 14 | 
 15 |   if (V.order == 1){
 16 |     Matrix<dtype> W(2, V.lens[0], V.lens[0], *V.wrld, *V.sr);
 17 |     dtype mulid = ((dtype*)V.sr->mulid())[0];
 18 |     W["ij"], [=](dtype & a){ a=mulid; };
 19 |     int ssym[] = {SH, NS};
 20 |     int nsym[] = {NS, NS};
 21 |     Tensor<dtype> W1(W, ssym);
 22 |     Tensor<dtype> W2(W1, nsym);
 23 |     V["i"] = W2["ji"]*V["j"];
 24 |   } else {
 25 |     Tensor<dtype> V2(V.order-1, V.lens, *V.wrld, *V.sr);
 26 |     char str[V.order];
 27 |     for (int i=0; i<V.order; i++){ str[i] = 'a'+i; }
 28 |     V2[str+1] += V[str];
 29 |     rec_scan(V2);
 30 |     
 31 |     Matrix<dtype> W(2, V.lens[V.order-1], V.lens[V.order-1], *V.wrld, *V.sr);
 32 |     dtype mulid = ((dtype*)V.sr->mulid())[0];
 33 |     W["ij"], [=](dtype & a){ a=mulid; };
 34 |     int hsym[] = {SH, NS};
 35 |     int nsym[] = {NS, NS};
 36 |     Tensor<dtype> W1(W, hsym);
 37 |     Tensor<dtype> W2(W1, nsym);
 38 |     char str2[V.order];
 39 |     memcpy(str2+1, str+1, V.order-1);
 40 |     str2[0] = 'a'+V.order;
 41 |     char strW[2] = {str2[0],'a'};
 42 |     V[str]  = W2[strW]*V[str2];
 43 |     V[str] += V2[str+1]; 
 44 |   }
 45 | }
 46 | 
 47 | template<typename dtype>
 48 | void scan(Vector<dtype> & v, int logn){
 49 |   int64_t np;
 50 |   int64_t * inds;
 51 |   double * data;
 52 | 
 53 |   int lens[logn];
 54 |   std::fill(lens, lens+logn, 2);
 55 | 
 56 |   // represent vector to scan as 2-by-...-by-2 tensor  
 57 |   Tensor<dtype> V(logn, lens, *v.wrld, *v.sr);
 58 | 
 59 |   v.get_local_data(&np, &inds, &data);
 60 |   V.write(np, inds, data);
 61 | 
 62 |   free(inds);
 63 |   delete [] data;
 64 | 
 65 |   rec_scan(V);
 66 | 
 67 |   // put the data from the tensor back into the vector
 68 |   V.get_local_data(&np, &inds, &data);
 69 |   v.write(np, inds, data);
 70 |   
 71 |   free(inds);
 72 |   delete [] data;
 73 | }
 74 | 
 75 | int scan_test(int     logn,
 76 |               World & dw){
 77 | 
 78 |   Vector<> v(1<<logn, dw);
 79 | 
 80 |   srand48(dw.rank*27);
 81 |   v.fill_random(0.0, 1.0);
 82 |   
 83 |   double start_data[1<<logn];
 84 | 
 85 |   v.read_all(start_data);
 86 | 
 87 |   scan(v, logn);
 88 | 
 89 |   double data[1<<logn];
 90 | 
 91 |   v.read_all(data);
 92 | 
 93 |   int pass = 1;
 94 |   for (int i=1; i<1<<logn; i++){
 95 |     if (std::abs(data[i] - start_data[i-1] - data[i-1]) >= 1.E-9*(1<<logn)) pass = 0;
 96 |   }
 97 |   if (dw.rank == 0){
 98 |     if (pass) 
 99 |       printf("{ scan via tensor contractions } passed \n");
100 |     else
101 |       printf("{ scan via tensor contractions } failed \n");
102 |   }
103 |   return pass;
104 | } 
105 | 
106 | 
107 | #ifndef TEST_SUITE
108 | char* getCmdOption(char ** begin,
109 |                    char ** end,
110 |                    const   std::string & option){
111 |   char ** itr = std::find(begin, end, option);
112 |   if (itr != end && ++itr != end){
113 |     return *itr;
114 |   }
115 |   return 0;
116 | }
117 | 
118 | 
119 | int main(int argc, char ** argv){
120 |   int rank, np, logn, pass;
121 |   int const in_num = argc;
122 |   char ** input_str = argv;
123 | 
124 |   MPI_Init(&argc, &argv);
125 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
126 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
127 | 
128 |   if (getCmdOption(input_str, input_str+in_num, "-logn")){
129 |     logn = atoi(getCmdOption(input_str, input_str+in_num, "-logn"));
130 |     if (logn < 0) logn = 4;
131 |   } else logn = 4;
132 | 
133 | 
134 |   {
135 |     World dw(argc, argv);
136 | 
137 |     if (rank == 0){
138 |       printf("Running scan on dimension %d vector\n",1<<logn);
139 |     }
140 |     pass = scan_test(logn, dw);
141 |     assert(pass);
142 |   }
143 | 
144 |   MPI_Finalize();
145 |   return 0;
146 | }
147 | /**
148 |  * @} 
149 |  * @}
150 |  */
151 | 
152 | #endif
153 | 


--------------------------------------------------------------------------------
/src/sparse_formats/coo.h:
--------------------------------------------------------------------------------
 1 | #ifndef __COO_H__
 2 | #define __COO_H__
 3 | 
 4 | #include "../tensor/algstrct.h"
 5 | 
 6 | namespace CTF_int {
 7 | 
 8 |   class CSR_Matrix;
 9 |   class bivar_function;
10 | 
11 |   int64_t get_coo_size(int64_t nnz, int val_size);
12 | 
13 |   /** \brief serialized matrix in coordinate format, meaning three arrays of dimension nnz are stored, one of values, and two of row and column indices */
14 |   class COO_Matrix{
15 |     public:
16 |       /** \brief serialized buffer containing info and data */
17 |       char * all_data;
18 |       
19 |       /** 
20 |        * \brief constructor that allocates empty buffer
21 |        * \param[in] nnz number of nonzeros
22 |        * \param[in] sr algebraic structure
23 |        */
24 |       COO_Matrix(int64_t nnz, algstrct const * sr);
25 | 
26 |       /** 
27 |        * \brief constructor that acccepts data buffer
28 |        * \param[in] all_data preallocated serialized COO Matrix buffer
29 |        */
30 |       COO_Matrix(char * all_data);
31 | 
32 |       /** 
33 |        * \brief constructor that constructs serialized COO Matrix from a CSR_Matrix
34 |        * \param[in] csr a matrix in CSR format
35 |        * \param[in] sr algebraic structure
36 |        */
37 |       COO_Matrix(CSR_Matrix const & csr, algstrct const * sr);
38 | 
39 |       /** \brief retrieves number of nonzeros out of all_data */
40 |       int64_t nnz() const;
41 | 
42 |       /** \brief retrieves buffer size out of all_data */
43 |       int64_t size() const;
44 | 
45 |       /** \brief retrieves number of rows out of all_data */
46 |       int nrow() const;
47 |       
48 |       /** \brief retrieves number of columns out of all_data */
49 |       int ncol() const;
50 |       
51 |       /** \brief retrieves matrix entry size out of all_data */
52 |       int val_size() const;
53 | 
54 |       /** \brief retrieves pointer to array of values out of all_data */
55 |       char * vals() const;
56 | 
57 |       /** \brief retrieves pointer to array row indices of each value */
58 |       int * rows() const;
59 | 
60 |       /** \brief retrieves pointer to array of column indices for each value */
61 |       int * cols() const;
62 | 
63 |       /**
64 |        * \brief folds tensor data into COO format based on prespecification of row and column modes
65 |        * \param[in] nz number of nonzers
66 |        * \param[in] order number of tensor modes
67 |        * \param[in] lens ranges of tensor modes
68 |        * \param[in] ordering reordering of tensor modes
69 |        * \param[in] nrow_idx number of modes to fold into rows
70 |        * \param[in] tsr_data in key-value pair format
71 |        * \param[in] sr algebraic structure
72 |        * \param[in] phase dimensions of the blocking grid
73 |        */
74 |       void set_data(int64_t nz, int order, int const * lens, int const * ordering, int nrow_idx, char const * tsr_data, algstrct const * sr, int const * phase);
75 | 
76 |       /**
77 |        * \brief unfolds tensor data from COO format based on prespecification of row and column modes
78 |        * \param[in] nz number of nonzers
79 |        * \param[in] order number of tensor modes
80 |        * \param[in] lens ranges of tensor modes
81 |        * \param[in] rev_ordering reordering of tensor modes
82 |        * \param[in] nrow_idx number of modes to fold into rows
83 |        * \param[in,out] tsr_data in key-value pair format
84 |        * \param[in] sr algebraic structure
85 |        * \param[in] phase dimensions of the blocking grid
86 |        * \param[in] phase_rank index of this block in grid
87 |        */
88 |       void get_data(int64_t nz, int order, int const * lens, int const * rev_ordering, int nrow_idx, char * tsr_data, algstrct const * sr, int const * phase, int const * phase_rank);
89 | 
90 |       /**
91 |        * \brief computes C = beta*C + func(alpha*A*B) where A is a COO_Matrix, while B and C are dense
92 |        */
93 |       static void coomm(char const * A, algstrct const * sr_A, int m, int n, int k, char const * alpha, char const * B, algstrct const * sr_B, char const * beta, char * C, algstrct const * sr_C, bivar_function const * func);
94 | 
95 |   };
96 | }
97 | 
98 | #endif
99 | 


--------------------------------------------------------------------------------
/studies/fast_diagram.cxx:
--------------------------------------------------------------------------------
  1 | /*Copyright (c) 2011, Edgar Solomonik, all rights reserved.*/
  2 | 
  3 | #include <ctf.hpp>
  4 | 
  5 | using namespace CTF;
  6 | 
  7 | int fast_diagram(int const     n,
  8 |                 World    &ctf){
  9 |   int rank, i, num_pes;
 10 |   
 11 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
 12 |   MPI_Comm_size(MPI_COMM_WORLD, &num_pes);
 13 | 
 14 |   int len3[] = {n,n,n};
 15 |   int len4[] = {n,n,n,n};
 16 |   //int len5[] = {n,n,n,n,n};
 17 |   int NNN[] = {NS,NS,NS};
 18 |   int NNNN[] = {NS,NS,NS,NS};
 19 |   int ANNN[] = {AS,NS,NS,NS};
 20 |   int SNNN[] = {SH,NS,NS,NS};
 21 |   //int AANNN[] = {AS,AS,NS,NS,NS};
 22 | 
 23 |   Tensor<> T(4, len4, SNNN, ctf);
 24 |   Tensor<> V(4, len4, SNNN, ctf);
 25 | 
 26 |   Tensor<> W(4, len4, SNNN, ctf);
 27 |   Tensor<> W_ans(4, len4, SNNN, ctf);
 28 | 
 29 |   Tensor<> Z_AS(4, len4, ANNN, ctf);
 30 |   Tensor<> Z_SH(4, len4, SNNN, ctf);
 31 |   Tensor<> Z_NS(4, len4, NNNN, ctf);
 32 |   Tensor<> Z_D(3, len3, NNN, ctf);
 33 |   
 34 | 
 35 |   Tensor<> Ts(3, len3, NNN, ctf);
 36 |   Tensor<> Zs(3, len3, NNN, ctf);
 37 | 
 38 |   {
 39 |     int64_t * indices;
 40 |     double * values;
 41 |     int64_t size;
 42 |     srand48(173*rank);
 43 | 
 44 |     T.read_local(&size, &indices, &values);
 45 |     for (i=0; i<size; i++){
 46 |       values[i] = drand48();
 47 |     }
 48 |     T.write(size, indices, values);
 49 |     free(indices);
 50 |     free(values);
 51 |     V.read_local(&size, &indices, &values);
 52 |     for (i=0; i<size; i++){
 53 |       values[i] = drand48();
 54 |     }
 55 |     V.write(size, indices, values);
 56 |     free(indices);
 57 |     free(values);
 58 |   }
 59 |   Z_NS["afin"] = T["aeim"]*V["efmn"];
 60 | //  Z_NS.print(stdout);
 61 |   W_ans["abij"] = Z_NS["afin"]*T["fbnj"];
 62 | //  W_ans.print(stdout);
 63 |  
 64 | //  Z_AS.print(stdout);
 65 |   Z_AS["afin"] = T["aeim"]*V["efmn"];
 66 |   Z_SH["afin"] = T["aeim"]*V["efmn"];
 67 |   Z_D["ain"] = T["aeim"]*V["eamn"];
 68 |   W["abij"] = Z_AS["afin"]*T["fbnj"];
 69 |   W["abij"] += Z_SH["afin"]*T["fbnj"];
 70 |   W["abij"] += Z_D["ain"]*T["abnj"];
 71 | //  W["abij"] -= Z_SY["aain"]*T["fbnj"];
 72 | //  W.print(stdout);
 73 |   //Z_AS["ebmj"] = V["efmn"]*T["fbnj"];
 74 | //  W["abij"] = T["aeim"]*Z_AS["ebmj"];
 75 | //  W["abij"] -= W_ans["abij"];
 76 | //  W.print(stdout);
 77 | //  Ts["eim"] = T["feim"];
 78 | //  Zs["ain"] = Ts["eim"]*V["eamn"];
 79 | //  W.print(stdout);
 80 | //  W["abij"] -= Zs["ain"]*Ts["bnj"];
 81 | //  W["abij"] = W["abij"];
 82 | 
 83 |   double nrm = sqrt((double)((W["abij"]-W_ans["abij"])*(W["abij"]-W_ans["abij"])));
 84 | 
 85 |   int pass = (nrm <=1.E-10);
 86 |   
 87 |   if (rank == 0){
 88 |     MPI_Reduce(MPI_IN_PLACE, &pass, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 89 |     if (pass) printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } passed\n");
 90 |     else      printf("{ C[\"(ij)ab\"] = A[\"(ik)al\"]*B[\"(kj)lb\"] } failed\n");
 91 |   } else 
 92 |     MPI_Reduce(&pass, MPI_IN_PLACE, 1, MPI_INT, MPI_MIN, 0, MPI_COMM_WORLD);
 93 |   return pass;
 94 | }
 95 | 
 96 | #ifndef TEST_SUITE
 97 | char* getCmdOption(char ** begin,
 98 |                    char ** end,
 99 |                    const   std::string & option){
100 |   char ** itr = std::find(begin, end, option);
101 |   if (itr != end && ++itr != end){
102 |     return *itr;
103 |   }
104 |   return 0;
105 | }
106 | 
107 | 
108 | int main(int argc, char ** argv){
109 |   int rank, np, n;
110 |   int const in_num = argc;
111 |   char ** input_str = argv;
112 | 
113 |   MPI_Init(&argc, &argv);
114 |   MPI_Comm_rank(MPI_COMM_WORLD, &rank);
115 |   MPI_Comm_size(MPI_COMM_WORLD, &np);
116 | 
117 |   if (getCmdOption(input_str, input_str+in_num, "-n")){
118 |     n = atoi(getCmdOption(input_str, input_str+in_num, "-n"));
119 |     if (n < 0) n = 5;
120 |   } else n = 5;
121 | 
122 |   {
123 |     World dw(MPI_COMM_WORLD, argc, argv);
124 |     if (rank == 0){
125 |       printf("Computing W^(ab)_ij=sum_(efmn)T^(ae)_im*V^(ef)_mn*T^(fb)_nj\n");
126 |     }
127 |     int pass = fast_diagram(n, dw);
128 |     assert(pass);
129 |   }
130 |   
131 |   MPI_Finalize();
132 |   return 0;
133 | }
134 | #endif
135 | 
136 | 


--------------------------------------------------------------------------------