├── .static └── custom.css ├── .templates └── layout.html ├── 00-introduction ├── README.rst ├── blackscholes_solution.png └── perlin_noise_solution.png ├── 01-examples ├── INTRO.rst ├── README.rst ├── array-sum-fortran │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── array_sum.f90 ├── cholesky │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── cholesky.c │ └── cholesky.h ├── stream-barr │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── stream-barr.c └── stream-deps │ ├── .config │ ├── multirun.sh │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── stream-deps.c ├── 02-beginners ├── README.rst ├── dot-product │ ├── .config │ │ ├── dot-product.c │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── dot-product.c ├── matmul │ ├── .config │ │ ├── matmul.c │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── matmul.c └── multisort │ ├── .config │ ├── multirun.sh │ ├── multisort.c │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── multisort.c ├── 03-gpu-devices ├── INTRO.rst ├── README.rst ├── cholesky-cuda │ ├── .config │ │ ├── cholesky_hyb.c │ │ ├── cuda_potrf.h │ │ ├── multirun.sh │ │ ├── run-once.sh │ │ ├── run.sh │ │ └── run_trace.sh │ ├── Makefile │ ├── README.rst │ ├── cholesky_hyb.c │ ├── cuda_potrf.cu │ └── cuda_potrf.h ├── krist-cuda │ ├── .config │ │ ├── krist.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── clocks.c │ ├── kernel.cu │ ├── krist.c │ └── krist.h ├── krist-opencl │ ├── .config │ │ ├── krist.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── clocks.c │ ├── kernel.cl │ ├── krist.c │ └── krist.h ├── matmul-cuda │ ├── .config │ │ ├── kernel.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── cclock.c │ ├── check.c │ ├── driver.c │ ├── driver.h │ ├── gendat.c │ ├── kernel.cu │ ├── kernel.h │ ├── matmul.c │ └── prtspeed.c ├── matmul-opencl │ ├── .config │ │ ├── kernel.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── cclock.c │ ├── check.c │ ├── driver.c │ ├── driver.h │ ├── gendat.c │ ├── kernel.cl │ ├── kernel.h │ ├── matmul.c │ └── prtspeed.c ├── nbody-cuda │ ├── .config │ │ ├── kernel.h │ │ ├── multirun.sh │ │ ├── nbody.h │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── kernel.cu │ ├── kernel.h │ ├── nbody.c │ ├── nbody.h │ ├── nbody_input-16384.in │ └── nbody_out-ref.xyz ├── nbody-opencl │ ├── .config │ │ ├── kernel.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── kernel.c │ ├── kernel.cl │ ├── kernel.h │ ├── nbody.c │ ├── nbody.h │ ├── nbody_input-16384.in │ └── nbody_out-ref.xyz ├── saxpy-cuda │ ├── .config │ │ ├── kernel.h │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── kernel.cu │ ├── kernel.h │ └── saxpy.c └── saxpy-opencl │ ├── .config │ ├── kernel.h │ ├── multirun.sh │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── kernel.cl │ ├── kernel.h │ └── saxpy.c ├── 04-mpi+ompss ├── README.rst ├── heat │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── heat-mpi-ompss.c │ ├── heat.h │ ├── misc.c │ ├── solver-mpi-ompss.c │ └── test.dat └── matmul │ ├── .config │ ├── mm-image.png │ ├── multirun.sh │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── bsize.h │ ├── cclock.c │ ├── check.c │ ├── driver.c │ ├── gendat.c │ ├── layouts.c │ ├── layouts.h │ ├── matmul.c │ ├── matmul.h │ ├── prthead.c │ ├── prtspeed.c │ └── test.in ├── 05-ompss+dlb ├── README.rst ├── lub │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── LUB.c │ ├── Makefile │ └── README.rst ├── lulesh │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README │ ├── README.rst │ ├── lulesh-comm.cc │ ├── lulesh-init.cc │ ├── lulesh-util.cc │ ├── lulesh-viz.cc │ ├── lulesh.cc │ ├── lulesh.h │ └── lulesh_tuple.h ├── pils-multiapp │ ├── .config │ │ ├── multirun.sh │ │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ ├── extrae-multiapp.xml │ ├── input1 │ ├── input2 │ ├── ompss_pils.c │ └── trace-multiapp.sh └── pils │ ├── .config │ ├── multirun.sh │ └── run-once.sh │ ├── Makefile │ ├── README.rst │ └── mpi_ompss_pils.c ├── README.rst ├── common-files ├── Makefile ├── configure_VirtualBox ├── configure_default ├── configure_mn4 ├── configure_nord3 ├── configure_nvidia ├── extrae.xml ├── graph.sh ├── multirun.sh ├── paraver ├── run-once.sh ├── sched-job-mpi_mn4 ├── sched-job-mpi_nord3 ├── sched-job-mpi_nvidia ├── sched-job-smp_mn4 ├── sched-job-smp_nord3 ├── sched-job-smp_nvidia ├── trace-mpi.sh └── trace.sh ├── configure.sh └── paraver-cfgs ├── cluster └── network_transfers_and_bw.cfg ├── general ├── 2dp_WhereMyNextTaskWasGenerated.cfg ├── analysis │ ├── 2dh_L1Dmr.cfg │ ├── 2dh_ipc.cfg │ ├── 2dh_ui.cfg │ ├── 2dh_useful_MIPS.cfg │ ├── 2dh_useful_instr.cfg │ ├── 2dh_usefultime.cfg │ ├── 2dp_uf.cfg │ ├── 2dp_useful.cfg │ ├── 3dh_duration_uf.cfg │ ├── 3dh_instr_uf.cfg │ ├── 3dh_ipc_uf.cfg │ ├── Load_balance.cfg │ ├── Sup_model_data.cfg │ ├── advanced │ │ ├── 2dc_cyc_ipc.cfg │ │ ├── 2dc_ud_ipc.cfg │ │ ├── 2dc_ufduration_IPC.cfg │ │ ├── 2dp_percentMPI_in_uf.cfg │ │ ├── 2dp_ufExcludingMPI.cfg │ │ ├── 3dc_instr_ipc_useful.cfg │ │ ├── 3dc_usefulInstr_cycles_ipc.cfg │ │ ├── 3dh_ud_u.cfg │ │ ├── 3dh_usefulIPC_uf.cfg │ │ ├── 3dp_ipc_useful.cfg │ │ ├── Sup_model_data.2.cfg │ │ ├── avg_procs.cfg │ │ └── load_balance_for_specific_uf.cfg │ └── efficiency.cfg ├── link_to_source │ └── by_call_stack │ │ ├── 2dp_MPIcallertime.cfg │ │ ├── 2dp_MPItime_line.cfg │ │ ├── 2dp_code_after_line.cfg │ │ ├── 2dp_line_call.cfg │ │ ├── 3dh_duration_MPIcallline.cfg │ │ ├── 3dp_MPItime_line.cfg │ │ ├── MPI_caller.cfg │ │ ├── MPI_caller_line.cfg │ │ ├── MPI_callers_4_levels.cfg │ │ └── nbcalls_toMPI_per_uf.cfg ├── sanity_checks │ ├── 2dh_cycperus.cfg │ ├── Equivalent_CPUs.cfg │ ├── Events_too_close.cfg │ ├── flushing.cfg │ ├── preempted_time_in_useful.cfg │ └── preempted_time_outside_useful.cfg └── views │ ├── User_function_excl_MPI.cfg │ ├── instantaneous_parallelism.cfg │ ├── not_running_duration.cfg │ ├── not_useful.cfg │ ├── one.cfg │ ├── state_as_is.cfg │ ├── state_duration.cfg │ ├── useful.cfg │ ├── useful_duration.cfg │ ├── user_calls.cfg │ ├── user_calls_duration.cfg │ ├── user_function_nesting_level.cfg │ ├── user_functions.cfg │ └── user_functions_duration.cfg ├── hwc ├── active_set.cfg └── papi │ ├── architecture │ ├── 2dh_L1D_total_misses.cfg │ ├── 2dh_L2_total_misses.cfg │ ├── 2dh_TLB_total_misses.cfg │ ├── 2dh_preemption_time.cfg │ ├── 3dh_IPC_state.cfg │ ├── 3dh_percentpreempted_useful.cfg │ ├── 3dh_preempted_useful.cfg │ ├── BytesPerFlop.cfg │ ├── BytesPerInstr.cfg │ ├── JS21_relative_preempted_time.cfg │ ├── L1D_misses.cfg │ ├── L1D_missratio.cfg │ ├── L1_2_L2_miss_ratio.cfg │ ├── L1_Load_misses.cfg │ ├── L1_Load_missratio.cfg │ ├── L1_store_misses.cfg │ ├── L2D_Total_miss_ratio.cfg │ ├── L2D_miss_ratio.cfg │ ├── L2D_miss_ratio_v2.cfg │ ├── L2D_misses.cfg │ ├── L2_2_TLB_miss_ratio.cfg │ ├── L3D_miss_ratio.cfg │ ├── L3D_misses.cfg │ ├── L3_misses.cfg │ ├── Preempted_time.cfg │ ├── Relative_preempted_time.cfg │ ├── TLB_misses.cfg │ ├── TLB_missratio.cfg │ ├── loaded_bytes.cfg │ ├── relative_preemption_time.cfg │ ├── useful_loadad_bytes.cfg │ └── useful_loaded_bytes.cfg │ ├── models │ └── 3D_duration.cfg │ ├── mx_counters │ ├── nb_medium_msgs_sent.cfg │ ├── nb_rndv_msgs_sent.cfg │ ├── nb_small_msgs_sent.cfg │ └── route_dispersion.cfg │ ├── performance │ ├── 2dh_ipc_frequency.cfg │ ├── 3dh_cycles_per_us.cfg │ ├── CPI.cfg │ ├── IPC.cfg │ ├── L2Dmisses_rate.cfg │ ├── MFLOPS.cfg │ ├── MFMAS.cfg │ ├── MIPS.cfg │ ├── MLoadS.cfg │ ├── MemBW_pernode.cfg │ ├── MemBW_perprocess.cfg │ ├── NoIssue_cycles_per_us.cfg │ ├── cycles_per_us.cfg │ ├── cycles_per_us_decentInterval.cfg │ ├── useful_MIPS.cfg │ └── useful_cycus.cfg │ └── program │ ├── 3dh_instr_state.cfg │ ├── Computation_intensity.cfg │ ├── Load2store_ratio.cfg │ ├── Load_stores.cfg │ ├── Loads.cfg │ ├── Loads_to_FMA_ratio.cfg │ ├── Stores.cfg │ ├── branch_mix.cfg │ ├── flops.cfg │ ├── instructions.cfg │ └── useful_instructions.cfg ├── mpi ├── analysis │ ├── 2dc_connectivity_bw.cfg │ ├── 2dh_bytes_sent.cfg │ ├── 2dp_MPI_activity.cfg │ ├── 2dp_connectivity.cfg │ ├── 2dp_mpi_stats.cfg │ ├── 3dc_connectivity_caller.cfg │ ├── 3dh_duration_MPI_activity.cfg │ ├── 3dh_duration_per_call.cfg │ ├── 3dh_size_call.cfg │ ├── advanced │ │ ├── 2dc_bytessent_totbytessent.cfg │ │ ├── 2dc_connectivity_snd_bytes.cfg │ │ ├── 2dc_e2ebw_bytes.cfg │ │ ├── 2dh_comm_phase_duration.cfg │ │ └── 2dp_totbytessent.cfg │ ├── avg_netbw.cfg │ ├── collectives │ │ ├── 3dh_duration_collective.cfg │ │ ├── 3dh_recvsize_collectivecall.cfg │ │ └── 3dh_sendsize_collectivecall.cfg │ ├── other │ │ ├── 3dc_p2p_size_bw_per_call.cfg │ │ ├── 3dc_size_bw_per_call.cfg │ │ ├── 3dh_bw_per_call.cfg │ │ ├── 3dh_duration_per_call.cfg │ │ ├── Collective_LateArrivers.cfg │ │ ├── Collective_stats.cfg │ │ ├── Collectives_balance.cfg │ │ ├── CommComp_overlap.cfg │ │ ├── Correlation_duration_size.cfg │ │ ├── CostOf_p2pCalls.cfg │ │ ├── MPIxroutine.cfg │ │ ├── Specific_collective_analysis.cfg │ │ ├── System_BW.cfg │ │ ├── call_duration_histogram.cfg │ │ └── communication_matrix.cfg │ └── point2point │ │ ├── 2d_comm_pattern.cfg │ │ ├── 2d_comm_pattern_rcv.cfg │ │ ├── 2d_comm_pattern_snd.cfg │ │ ├── 2d_comm_pattern_snd_order.cfg │ │ ├── 2d_costofreceives_per_source.cfg │ │ ├── 2d_who_comms.cfg │ │ ├── 2d_who_latesends_to_whom.cfg │ │ ├── 2dh_p2p_phase_duration.cfg │ │ ├── 2dh_send_size.cfg │ │ ├── 2dp_high_bw_process.cfg │ │ ├── 3dc_msgsize_totbytes.cfg │ │ ├── 3dc_srbw_bytes.cfg │ │ ├── 3dh_bw_per_call.cfg │ │ ├── 3dh_cost_per_call.cfg │ │ ├── 3dh_msgsize_per_pt2pt_call.cfg │ │ ├── 3dh_srbw_per_call.cfg │ │ ├── IProbe_density.cfg │ │ └── system_bw.cfg ├── sanity_checks │ ├── 2d_compute_shifts.cfg │ ├── 2dt_backwards_time.cfg │ ├── 2dt_brwds_nbbwrds.cfg │ ├── backward_msgs.cfg │ ├── cloged_system.cfg │ ├── duration_backwards_msg.cfg │ └── src_backwards_msg.cfg ├── scalasca_properties │ ├── late_receivers.cfg │ ├── received_from_delayed.cfg │ ├── receives_from_late_sender.cfg │ └── receiving_from_latesender.cfg └── views │ ├── Activity_duration.cfg │ ├── Enumeration_of_MPI_calls.cfg │ ├── InMPI_mem_BW.cfg │ ├── In_MPI_call.cfg │ ├── MPI_activity.cfg │ ├── MPI_bandwidth.cfg │ ├── MPI_call.cfg │ ├── MPI_call_density.cfg │ ├── MPI_call_duration.cfg │ ├── Outside_MPI.cfg │ ├── advanced │ ├── 2d_who_comms.cfg │ ├── Failed_iprobes.cfg │ ├── Failed_tests.cfg │ ├── From_where_mpi_calls.cfg │ ├── Isend_waits.cfg │ ├── MPI_Wait_from_Isend.cfg │ ├── MPI_collectives.cfg │ ├── MPI_p2p.cfg │ ├── MPIcall_cost.cfg │ ├── MPIcall_cost_perbyte.cfg │ ├── MPIcall_duration.cfg │ ├── Who_calls_mpi.cfg │ ├── bytes_arriving.cfg │ ├── bytes_outgoing.cfg │ ├── bytes_sr_within_call.cfg │ ├── bytesperMBS.cfg │ ├── bytespermsg.cfg │ ├── in_MPI_call.cfg │ ├── in_specific_MPI_call.cfg │ ├── long_MPI_calls.cfg │ ├── messages_arriving.cfg │ ├── messages_outgoing.cfg │ ├── p2p_phase_duration.cfg │ ├── receive_bandwidth.cfg │ ├── receive_bandwidth_appl.cfg │ ├── receive_bandwidth_task.cfg │ ├── send_bandwidth.cfg │ ├── send_bandwidth_appl.cfg │ ├── send_bandwidth_task.cfg │ ├── specific_MPI_duration.cfg │ ├── sr_msgs.cfg │ ├── total_bytes_in_transit.cfg │ ├── total_sr_bw.cfg │ ├── total_sr_msgs.cfg │ ├── total_system_bw.cfg │ └── typeof_MPI_Wait.cfg │ ├── collectives │ ├── MPI_collective_call.cfg │ ├── advanced │ │ ├── Broadcast_number.cfg │ │ └── all2all_number.cfg │ ├── collective_duration.cfg │ ├── collective_root.cfg │ ├── collective_sizes.cfg │ ├── communicator.cfg │ ├── enumerate_collectives.cfg │ ├── nbprocs_in_colective.cfg │ └── outside_collective.cfg │ ├── comm_size.cfg │ ├── communication_phase.cfg │ ├── communication_phase_duration.cfg │ ├── in_MPI_activity.cfg │ ├── msg_sizes.cfg │ ├── nb_active_processes.cfg │ ├── nb_in_MPI.cfg │ ├── node_bandwidth.cfg │ └── point2point │ ├── In_MPI_pt2pt_call.cfg │ ├── In_MPI_reception_call.cfg │ ├── In_MPI_send_pt2pt_call.cfg │ ├── MPICall_overhead.cfg │ ├── MPI_p2p_call.cfg │ ├── advanced │ ├── In_long_receptions.cfg │ ├── bytes_received_at_waits.cfg │ ├── destination_last_large_send.cfg │ ├── exclusively_1_direction_transfers.cfg │ ├── high_bw_process.cfg │ ├── high_s_r_bandwidth.cfg │ ├── max_recBW_during_activity.cfg │ ├── physical_s_r_bandwidth.cfg │ ├── physical_s_r_msgs.cfg │ ├── receiving_not_sending.cfg │ └── sending_not_receiving.cfg │ ├── destination_of_send.cfg │ ├── iprobe_misses.cfg │ ├── iprobe_misses_per_ms.cfg │ ├── models │ ├── excess_time.cfg │ └── linear_model.cfg │ ├── nb_collective.cfg │ ├── nbprocs_in_pt2pt.cfg │ ├── outstanding_sends.cfg │ ├── p2p_bytes_received.cfg │ ├── p2p_bytes_sent.cfg │ ├── p2p_duration.cfg │ ├── p2p_recv_size.cfg │ ├── p2p_send_size.cfg │ ├── p2p_size.cfg │ ├── s_r_bandwidth.cfg │ ├── s_r_bytes.cfg │ ├── s_r_msgs.cfg │ ├── source_of_reception.cfg │ ├── to_whom_I_send.cfg │ ├── total_bw.cfg │ ├── total_bytes_btw_events.cfg │ ├── total_msgs_in_transit.cfg │ └── wait_type.cfg └── ompss ├── 2d_general.cfg ├── cuda ├── 3dh_duration_CUDAruntime.cfg ├── CUDA_runtime.cfg ├── Non_overlapped_CUDA_Transfer_Direction.cfg └── cuda_transfers.cfg ├── data_mgmgt ├── 2dh_bw2device.cfg ├── aggregated_bandwidth.cfg ├── bandwidth_per_device.cfg ├── bytes_being_transfered.cfg ├── cache_waiting_for.cfg ├── data_tx.cfg ├── direction_of_data_transfer_nosesiescorrecta.cfg ├── malloc_free_in_device.cfg └── nb_ongoing_transfers_duda.cfg ├── general.cfg ├── graph_and_scheduling ├── 2dp_order.cfg ├── creating_submitting_task.cfg ├── nb_concurrent_ready.cfg ├── nb_ready_tasks.cfg ├── nb_tasks_in_graph.cfg └── versioning_sched.cfg ├── opencl └── opencl_runtime.cfg ├── runtime ├── 2dp_thread_state.cfg ├── 3dh_duration_state.cfg ├── async_thread_state.cfg ├── average_sleep_time.cfg ├── nanos_API.cfg ├── nanos_locks.cfg ├── num_threads.cfg ├── spins_yields.cfg ├── thread_cpuid.cfg ├── thread_numa_node.cfg ├── thread_state.cfg ├── thread_state_with_locks.cfg └── waiting_task.cfg ├── tasks ├── 2dp_tasks.cfg ├── 3dh_L2Tmr_task.cfg ├── 3dh_L2mr_task.cfg ├── 3dh_duration_task.cfg ├── 3dh_instr_task.cfg ├── 3dh_ipc_task.cfg ├── in_task.cfg ├── task_name_and_location.cfg ├── task_numa_node.cfg ├── task_number.cfg └── task_priority.cfg └── worksharing_loops.cfg /.static/custom.css: -------------------------------------------------------------------------------- 1 | @import url('https://fonts.googleapis.com/css?family=Roboto+Condensed:400,700'); 2 | @import url('https://fonts.googleapis.com/css?family=Roboto+Mono:400,700'); 3 | 4 | span.option { 5 | font-family: "Roboto Mono", monospace 6 | } 7 | 8 | body:before { 9 | content: "Programming Models @ BSC"; 10 | background: #6fa5cc; 11 | width: 100%; 12 | display: block; 13 | text-align: center; 14 | height: auto; 15 | overflow: hidden; 16 | color: #000000; 17 | font-size: 48px; 18 | font-style: normal; 19 | font-weight: 600; 20 | line-height: 54px; 21 | padding: 3rem 0 3rem 0; 22 | } 23 | 24 | @media screen and (max-width: 875px) { 25 | body:before { 26 | margin: -20px -30px 20px -30px; 27 | width: calc(100% + 60px); 28 | } 29 | } 30 | 31 | .document { 32 | width: 1040px !important; 33 | } 34 | 35 | @media screen and (max-width: 875px) { 36 | .document { 37 | width: 100% !important; 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /.templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block rootrellink %} 3 |
  • BSC Programming Models »
  • 4 | {{ super() }} 5 | {% endblock %} 6 | 7 | -------------------------------------------------------------------------------- /00-introduction/blackscholes_solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/00-introduction/blackscholes_solution.png -------------------------------------------------------------------------------- /00-introduction/perlin_noise_solution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/00-introduction/perlin_noise_solution.png -------------------------------------------------------------------------------- /01-examples/INTRO.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | In this section we include several OmpSs applications that are already parallelized (i.e. annotated 5 | with OmpSs directives). Users have not to change the code, but they are encouraged to experiment 6 | with them. You can also use that source directory to experiment with the different compiler 7 | and runtime options, as well as the different instrumentation plugins provided with your OmpSs 8 | installation. 9 | 10 | -------------------------------------------------------------------------------- /01-examples/README.rst: -------------------------------------------------------------------------------- 1 | Examples Using OmpSs 2 | ******************** 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :numbered: 7 | 8 | INTRO.rst 9 | cholesky/README.rst 10 | stream-barr/README.rst 11 | array-sum-fortran/README.rst 12 | -------------------------------------------------------------------------------- /01-examples/array-sum-fortran/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=array_sum-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | NSIZES="8388608" 7 | BSIZES="32768" 8 | 9 | for N in $NSIZES; do 10 | for BS in $BSIZES; do 11 | for thread in $THREADS; do 12 | NX_SMP_WORKERS=$thread ./$PROGRAM $N $BS 13 | done 14 | done 15 | done 16 | -------------------------------------------------------------------------------- /01-examples/array-sum-fortran/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=array_sum-p 2 | N=8388608 3 | BS=32768 4 | 5 | ./$PROGRAM $N $BS 6 | -------------------------------------------------------------------------------- /01-examples/array-sum-fortran/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=array_sum 2 | 3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 4 | 5 | JOB_SCHED_VERSION=-smp 6 | 7 | BASE_DIR=../.. 8 | include $(BASE_DIR)/common-files/Makefile 9 | 10 | FC = mfc 11 | 12 | FFLAGS = --ompss --no-copy-deps 13 | FFLAGS_P = 14 | FFLAGS_I = --instrument 15 | FFLAGS_D = --debug 16 | 17 | LIBS = 18 | INCS = 19 | 20 | EXTRA = -O3 21 | 22 | $(PROGRAM)-p: $(PROGRAM).f90 23 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 24 | 25 | $(PROGRAM)-i: $(PROGRAM).f90 26 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 27 | 28 | $(PROGRAM)-d: $(PROGRAM).f90 29 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 30 | 31 | clean: 32 | rm -f $(FC)_* *.o *~ $(TARGETS) 33 | 34 | -------------------------------------------------------------------------------- /01-examples/cholesky/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | 2 | PROGRAM=cholesky-p 3 | 4 | export IFS=";" 5 | 6 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 7 | MSIZES="2048" 8 | BSIZES="256" 9 | 10 | for MS in $MSIZES; do 11 | for BS in $BSIZES; do 12 | for thread in $THREADS; do 13 | NX_SMP_WORKERS=$thread ./$PROGRAM $MS $BS 0 14 | done 15 | done 16 | done 17 | -------------------------------------------------------------------------------- /01-examples/cholesky/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | 2 | PROGRAM=cholesky-p 3 | 4 | export NX_SMP_WORKERS=4 5 | 6 | ./$PROGRAM 4096 512 1 7 | 8 | -------------------------------------------------------------------------------- /01-examples/cholesky/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=cholesky 2 | 3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 4 | 5 | JOB_SCHED_VERSION=-smp 6 | 7 | BASE_DIR=../.. 8 | include $(BASE_DIR)/common-files/Makefile 9 | 10 | CC = mcc 11 | 12 | CFLAGS = --ompss 13 | CFLAGS_P = 14 | CFLAGS_I = --instrument 15 | CFLAGS_D = --debug 16 | 17 | LIBS = --Wl,-L$(MKL_LIB_DIR) -lmkl_sequential -lmkl_core -lmkl_rt -lpthread 18 | INCS = -I$(MKL_INC_DIR) 19 | 20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused 21 | 22 | $(PROGRAM)-p: $(PROGRAM).c $(MKL_CHECK) 23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 24 | 25 | $(PROGRAM)-i: $(PROGRAM).c $(MKL_CHECK) 26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 27 | 28 | $(PROGRAM)-d: $(PROGRAM).c $(MKL_CHECK) 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 30 | 31 | clean: 32 | rm -f $(CC)_* *.o *~ $(TARGETS) 33 | 34 | -------------------------------------------------------------------------------- /01-examples/stream-barr/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-barr-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | 7 | for thread in $THREADS; do 8 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM 9 | done 10 | -------------------------------------------------------------------------------- /01-examples/stream-barr/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-barr-p 2 | 3 | export NX_SMP_WORKERS=1 4 | 5 | ./$PROGRAM 6 | 7 | -------------------------------------------------------------------------------- /01-examples/stream-barr/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-barr 2 | 3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 4 | 5 | JOB_SCHED_VERSION=-smp 6 | 7 | BASE_DIR=../.. 8 | include $(BASE_DIR)/common-files/Makefile 9 | 10 | CC = mcc 11 | 12 | CFLAGS = --ompss 13 | CFLAGS_P = 14 | CFLAGS_I = --instrument 15 | CFLAGS_D = --debug 16 | 17 | LIBS = 18 | INCS = 19 | 20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused -mcmodel=large 21 | 22 | $(PROGRAM)-p: $(PROGRAM).c 23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 24 | 25 | $(PROGRAM)-i: $(PROGRAM).c 26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 27 | 28 | $(PROGRAM)-d: $(PROGRAM).c 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 30 | 31 | clean: 32 | rm -f $(CC)_* *.o *~ $(TARGETS) 33 | 34 | -------------------------------------------------------------------------------- /01-examples/stream-barr/README.rst: -------------------------------------------------------------------------------- 1 | Stream Benchmark 2 | ---------------- 3 | 4 | The stream benchmark is part of the HPC Challenge benchmarks (http://icl.cs.utk.edu/hpcc/) and here 5 | we present two versions: one that inserts barriers and another without barriers. The behavior of 6 | version with barriers resembles the OpenMP version, where the different functions (Copy, Scale, ...) 7 | are executed one after another for the whole array while in the version without barriers, functions 8 | that operate on one part of the array are interleaved and the OmpSs runtime keeps the correctness 9 | by means of the detection of data-dependences. 10 | 11 | .. note:: 12 | You can dowload this code visiting the url http://pm.bsc.es *OmpSs Examples and Exercises*'s 13 | (code) link. The Stream benchmark is included inside the *01-examples*'s directory. 14 | -------------------------------------------------------------------------------- /01-examples/stream-deps/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-deps-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | 7 | for thread in $THREADS; do 8 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM 9 | done 10 | -------------------------------------------------------------------------------- /01-examples/stream-deps/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-deps-p 2 | 3 | export NX_SMP_WORKERS=1 4 | 5 | ./$PROGRAM 6 | -------------------------------------------------------------------------------- /01-examples/stream-deps/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=stream-deps 2 | 3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 4 | 5 | JOB_SCHED_VERSION=-smp 6 | 7 | BASE_DIR=../.. 8 | include $(BASE_DIR)/common-files/Makefile 9 | 10 | CC = mcc 11 | 12 | CFLAGS = --ompss 13 | CFLAGS_P = 14 | CFLAGS_I = --instrument 15 | CFLAGS_D = --debug 16 | 17 | LIBS = 18 | INCS = 19 | 20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused -mcmodel=large 21 | 22 | $(PROGRAM)-p: $(PROGRAM).c 23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 24 | 25 | $(PROGRAM)-i: $(PROGRAM).c 26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 27 | 28 | $(PROGRAM)-d: $(PROGRAM).c 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 30 | 31 | clean: 32 | rm -f $(CC)_* *.o *~ $(TARGETS) 33 | 34 | -------------------------------------------------------------------------------- /01-examples/stream-deps/README.rst: -------------------------------------------------------------------------------- 1 | ../stream-barr/README.rst -------------------------------------------------------------------------------- /02-beginners/README.rst: -------------------------------------------------------------------------------- 1 | Beginners Exercises 2 | ******************* 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :numbered: 7 | 8 | matmul/README.rst 9 | dot-product/README.rst 10 | multisort/README.rst 11 | -------------------------------------------------------------------------------- /02-beginners/dot-product/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=dot-product-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | MSIZE="8192" 7 | BSIZE="128" 8 | 9 | for MS in $MSIZE; do 10 | for BS in $BSIZE; do 11 | for thread in $THREADS; do 12 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $MS $BS 13 | done 14 | done 15 | done 16 | -------------------------------------------------------------------------------- /02-beginners/dot-product/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=dot-product-p 2 | 3 | export NX_SMP_WORKERS=4 4 | 5 | ./$PROGRAM 8192 128 6 | 7 | -------------------------------------------------------------------------------- /02-beginners/dot-product/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=dot-product 2 | PREFIX=. 3 | 4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION=-smp 7 | 8 | BASE_DIR=../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = mcc 12 | 13 | CFLAGS = --ompss --no-copy-deps 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = 19 | INCS = 20 | 21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused 22 | 23 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c 24 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 25 | 26 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c 27 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 28 | 29 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c 30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 31 | 32 | clean: 33 | rm -f $(CC)_* *.o *~ $(TARGETS) 34 | 35 | -------------------------------------------------------------------------------- /02-beginners/matmul/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | SIZES="16" 7 | 8 | for size in $SIZES; do 9 | for thread in $THREADS; do 10 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $size 11 | done 12 | done 13 | -------------------------------------------------------------------------------- /02-beginners/matmul/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export NX_SMP_WORKERS=4 4 | 5 | ./$PROGRAM 16 6 | 7 | -------------------------------------------------------------------------------- /02-beginners/matmul/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul 2 | PREFIX=. 3 | 4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION=-smp 7 | 8 | BASE_DIR=../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = mcc 12 | 13 | CFLAGS = --ompss 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = 19 | INCS = 20 | 21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused 22 | 23 | RM = rm -f 24 | 25 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 26 | 27 | all: $(TARGETS) 28 | 29 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c 30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 31 | 32 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c 33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 34 | 35 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c 36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 37 | 38 | clean: 39 | $(RM) $(CC)_* *.o *~ $(TARGETS) 40 | 41 | -------------------------------------------------------------------------------- /02-beginners/matmul/README.rst: -------------------------------------------------------------------------------- 1 | Matrix Multiplication 2 | --------------------- 3 | 4 | .. highlight:: c 5 | 6 | This example performs the multiplication of two matrices (A and B) into a third one (C). Since 7 | the code is not optimized, not very good performance results are expected. Think about how to 8 | parallelize (using OmpSs) the following code found in compute() function:: 9 | 10 | for (i = 0; i < DIM; i++) 11 | for (j = 0; j < DIM; j++) 12 | for (k = 0; k < DIM; k++) 13 | matmul ((double *)A[i][k], (double *)B[k][j], (double *)C[i][j], NB); 14 | 15 | This time you are on your own: you have to identify what code must be a task. There are a few 16 | hints and that you may consider before do the exercise: 17 | 18 | * Have a look at the compute function. It is the one that the main procedure calls to perform 19 | the multiplication. As you can see, this algorithm operates on blocks (to increase memory 20 | locality and to parallelize operations on those blocks). 21 | * Now go to the matmul function. As you can see, this function performs the multiplication on 22 | a block level. 23 | * When creating tasks do not forget to ensure that all of them have finished before returning 24 | the result of the matrix multiplication (would it be necessary any synchronization directive 25 | to guarantee that result has been already computed?). 26 | 27 | **Goals of this exercise** 28 | 29 | * Look for candidates to become a task and taskify them 30 | * Include synchroniztion directives when required 31 | * Check scalability (for different versions), use different runtime options (schedulers,... ) 32 | * Get a task dependency graph and/or paraver traces 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /02-beginners/multisort/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=multisort-p 2 | 3 | export IFS=";" 4 | 5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12" 6 | VSIZE="65536" 7 | SEQ_SORT="256" 8 | SEQ_MERGE="512" 9 | 10 | for size in $VSIZE; do 11 | for seq_sort in $SEQ_SORT; do 12 | for seq_merge in $SEQ_MERGE; do 13 | for thread in $THREADS; do 14 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $size $seq_sort $seq_merge 15 | done 16 | done 17 | done 18 | done 19 | -------------------------------------------------------------------------------- /02-beginners/multisort/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=multisort-p 2 | 3 | export NX_SMP_WORKERS=4 4 | 5 | ./$PROGRAM 65536 256 512 6 | 7 | -------------------------------------------------------------------------------- /02-beginners/multisort/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=multisort 2 | PREFIX=. 3 | 4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION=-smp 7 | 8 | BASE_DIR=../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = mcc 12 | 13 | CFLAGS = --ompss --no-copy-deps 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = 19 | INCS = 20 | 21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused 22 | 23 | RM = rm -f 24 | 25 | all: $(TARGETS) 26 | 27 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c 28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 29 | 30 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c 31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 32 | 33 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c 34 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) 35 | 36 | clean: 37 | $(RM) $(CC)_* *.o *~ $(TARGETS) 38 | -------------------------------------------------------------------------------- /03-gpu-devices/INTRO.rst: -------------------------------------------------------------------------------- 1 | Introduction 2 | ============ 3 | 4 | Almost all the programs in this section is available both in OpenCL and CUDA. From the point of 5 | view of an OmpSs programmer, the only difference between them is the language in which the kernel 6 | is written. 7 | 8 | As OmpSs abstracts the user from doing the work in the host part of the code. Both OpenCL and CUDA 9 | have the same syntax. You can do any of the two versions, as they are basically the same, when you 10 | got one of them working, same steps can be done in the other version. 11 | 12 | -------------------------------------------------------------------------------- /03-gpu-devices/README.rst: -------------------------------------------------------------------------------- 1 | GPU Device Exercises 2 | ******************** 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :numbered: 7 | 8 | INTRO.rst 9 | saxpy-cuda/README.rst 10 | krist-cuda/README.rst 11 | matmul-cuda/README.rst 12 | nbody-cuda/README.rst 13 | cholesky-cuda/README.rst 14 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/.config/cuda_potrf.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_POTRF_H_ 2 | #define _CUDA_POTRF_H_ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C"{ 8 | #endif 9 | 10 | int 11 | cuda_dpotrf(cublasHandle_t handle, char uplo, int n, 12 | double *dA, int ldda, int *info); 13 | 14 | int 15 | cuda_spotrf(cublasHandle_t handle, char uplo, int n, 16 | float *dA, int ldda, int *info); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif // _CUDA_POTRF_H_ -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=cholesky_hyb-p 2 | 3 | export NX_SMP_WORKERS=1 4 | 5 | export NX_GPUMAXMEM=90 6 | 7 | # Executing the application 8 | for gpus in 1 2 ; do 9 | export NX_GPUS=$gpus 10 | ./$PROGRAM 16384 2048 0 11 | done 12 | 13 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=cholesky_hyb-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_GPUS=2 #change this in order to use more GPUs 5 | 6 | export NX_GPUMAXMEM=90 7 | 8 | # Executing the application 9 | ./$PROGRAM 16384 2048 0 10 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/.config/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #@ wall_clock_limit = 00:20:00 3 | #@ initialdir = . 4 | #@ error = cholesky_%j.err 5 | #@ output = cholesky_%j.out 6 | #@ total_tasks = 1 7 | #@ cpus_per_task = 12 8 | #@ gpus_per_node = 2 9 | 10 | export LD_LIBRARY_PATH=/opt/compilers/intel/mkl/lib/intel64/:$LD_LIBRARY_PATH 11 | export NX_SMP_WORKERS=1 12 | 13 | for gpus in 1 2 ; do 14 | echo "Number of gpus = $gpus" 15 | export NX_GPUS=$gpus 16 | ./cholesky_hyb 16384 2048 0 17 | echo " " 18 | done 19 | 20 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/.config/run_trace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #@ wall_clock_limit = 00:20:00 3 | #@ initialdir = . 4 | #@ error = cholesky_%j.err 5 | #@ output = cholesky_%j.out 6 | #@ total_tasks = 1 7 | #@ cpus_per_task = 12 8 | #@ gpus_per_node = 2 9 | 10 | 11 | 12 | export NX_INSTRUMENTATION=extrae 13 | export LD_LIBRARY_PATH=/opt/compilers/intel/mkl/lib/intel64/:$LD_LIBRARY_PATH 14 | export EXTRAE_CONFIG_FILE=../../extrae.xml 15 | 16 | for gpus in 1 2 ; do 17 | echo "Number of gpus = $gpus" 18 | export NX_GPUS=$gpus 19 | ./cholesky_hyb 16384 2048 0 20 | echo " " 21 | done 22 | 23 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=cholesky_hyb 2 | KERNEL=cuda_potrf 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | KC = nvcc 14 | 15 | CFLAGS = --ompss --cuda 16 | CFLAGS_P = 17 | CFLAGS_I = --instrument 18 | CFLAGS_D = --debug 19 | 20 | NVCFLAGS = -O3 -arch=sm_20 21 | 22 | 23 | LIBS = --Wl,-L$(MKL_LIB_DIR) -lmkl_sequential -lmkl_core -lmkl_rt -lpthread 24 | INCS = -I$(PREFIX) -I$(MKL_INC_DIR) 25 | 26 | EXTRA = -O3 -Wall -Wno-unused 27 | 28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(MKL_CHECK) 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o 30 | 31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(MKL_CHECK) 32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o 33 | 34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(MKL_CHECK) 35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o 36 | 37 | 38 | $(PROGRAM)-p.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h 39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $< 40 | 41 | $(PROGRAM)-i.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h 42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $< 43 | 44 | $(PROGRAM)-d.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h 45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $< 46 | 47 | 48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h 49 | $(KC) $(NVCFLAGS) $(INCS) -o $@ -c $< 50 | 51 | .c.o: 52 | $(CC) --no-openmp $(EXTRA) -c $< 53 | 54 | clean: 55 | rm -f $(CC)_* *.o *~ $(TARGETS) 56 | 57 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/README.rst: -------------------------------------------------------------------------------- 1 | Cholesky kernel 2 | --------------- 3 | 4 | This kernel is just like the SMP version found in the examples, but implemented 5 | in CUDA. It uses CUBLAS kernels for the ``syrk``, ``trsm`` and ``gemm`` 6 | kernels, and a CUDA implementation for the potrf kernel (declared in a 7 | different file). 8 | 9 | Your assignment is to annotate all CUDA tasks in the source code under the 10 | section "TASKS FOR CHOLESKY". 11 | 12 | -------------------------------------------------------------------------------- /03-gpu-devices/cholesky-cuda/cuda_potrf.h: -------------------------------------------------------------------------------- 1 | #ifndef _CUDA_POTRF_H_ 2 | #define _CUDA_POTRF_H_ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C"{ 8 | #endif 9 | 10 | int 11 | cuda_dpotrf(cublasHandle_t handle, char uplo, int n, 12 | double *dA, int ldda, int *info); 13 | 14 | int 15 | cuda_spotrf(cublasHandle_t handle, char uplo, int n, 16 | float *dA, int ldda, int *info); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif // _CUDA_POTRF_H_ -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/.config/krist.h: -------------------------------------------------------------------------------- 1 | #define DIM2_H 4 2 | #define DIM2_A 4 3 | #define DIM2_E 2 4 | 5 | #if DIM2_H == 4 6 | #define TYPE_H float4 7 | #endif 8 | #if DIM2_H == 3 9 | #define TYPE_H float3 10 | #endif 11 | 12 | #if DIM2_A == 4 13 | #define TYPE_A float4 14 | #endif 15 | #if DIM2_A == 3 16 | #define TYPE_A float3 17 | #endif 18 | 19 | #if DIM2_E == 4 20 | #define TYPE_E float4 21 | #endif 22 | #if DIM2_E == 3 23 | #define TYPE_E float3 24 | #endif 25 | #if DIM2_E == 2 26 | #define TYPE_E float2 27 | #endif 28 | 29 | #ifdef __cplusplus 30 | extern "C" 31 | { 32 | #endif 33 | 34 | #pragma omp target device(cuda) copy_deps ndrange(1,nr,128) 35 | #pragma omp task in([NA] a, [NH] h) out([NE] E) 36 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA, 37 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E); 38 | 39 | #ifdef __cplusplus 40 | } 41 | #endif 42 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=krist-p 2 | 3 | export NX_GPUMAXMEM=90 4 | 5 | export IFS=";" 6 | 7 | GPUS="1" 8 | ATOMS="1000;2000;3000;" 9 | REFLECTIONS="2000" 10 | 11 | for atoms in $ATOMS; do 12 | for reflections in $REFLECTIONS; do 13 | for NX_GPUS in $GPUS; do 14 | ./$PROGRAM $atoms $reflections 15 | done 16 | done 17 | done 18 | 19 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=krist-p 2 | 3 | export NX_GPUMAXMEM=90 4 | export NX_GPUS=1 #change this in order to use more GPUs 5 | 6 | ./$PROGRAM 1000 2000 --serial 7 | 8 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=krist 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | KC = nvcc 14 | 15 | CFLAGS = --ompss --cuda 16 | CFLAGS_P = 17 | CFLAGS_I = --instrument 18 | CFLAGS_D = --debug 19 | 20 | NVCFLAGS = -O3 21 | 22 | LIBS = 23 | INCS = -I$(PREFIX) 24 | 25 | EXTRA = -O3 -Wall -Wno-unused 26 | 27 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o clocks.o 28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o 29 | 30 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o clocks.o 31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o 32 | 33 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o clocks.o 34 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o 35 | 36 | 37 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/krist.h 38 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $< 39 | 40 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/krist.h 41 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $< 42 | 43 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/krist.h 44 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $< 45 | 46 | 47 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/krist.h 48 | $(KC) $(INCS) -o $@ -c $< 49 | 50 | .c.o: 51 | $(CC) --no-openmp $(EXTRA) -c $< 52 | 53 | clean: 54 | rm -f $(CC)_* *.o *~ $(TARGETS) 55 | 56 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/clocks.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "unistd.h" 6 | double cputime() /* aix, xlf */ 7 | { 8 | struct tms b; 9 | clock_t r; 10 | times( &b); 11 | r = b.tms_utime + b.tms_stime; 12 | return ( (double) r/(double) sysconf(_SC_CLK_TCK)); 13 | } 14 | double CPUTIME() /* cray */ 15 | { 16 | return ( cputime()); 17 | } 18 | double cputime_() /* g77, gcc */ 19 | { 20 | return ( cputime()); 21 | } 22 | 23 | double wallclock() 24 | { 25 | struct timeval toot; 26 | //struct timezone prut; 27 | double r; 28 | 29 | //gettimeofday(&toot,&prut); 30 | gettimeofday(&toot, NULL); 31 | r=toot.tv_sec+0.000001*(double)toot.tv_usec; 32 | return(r); 33 | } 34 | double WALLCLOCK() 35 | { 36 | return (wallclock()); 37 | } 38 | double wallclock_() 39 | { 40 | return wallclock(); 41 | } 42 | 43 | void fortransleep(int *i) 44 | { 45 | sleep(*i); 46 | } 47 | 48 | void FORTRANSLEEP(int *i) 49 | { 50 | sleep(*i); 51 | } 52 | 53 | void fortransleep_(int *i) 54 | { 55 | sleep(*i); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | 8 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA, 9 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E) 10 | { 11 | __shared__ TYPE_A ashared[(16384-2048)/sizeof(TYPE_A)]; 12 | int a_start; 13 | 14 | int i = blockDim.x * blockIdx.x + threadIdx.x; 15 | if (i < nr) E[i].x = E[i].y = 0.0f; 16 | 17 | for (a_start = 0; a_start < na; a_start += nc) { 18 | int a_end = min(a_start + nc, na); 19 | int k = threadIdx.x; 20 | while (k < a_end - a_start) { 21 | ashared[k] = a[k + a_start]; 22 | k += blockDim.x; 23 | } 24 | 25 | __syncthreads(); 26 | 27 | if (i < nr) { 28 | int j; 29 | float A,B; 30 | const float twopi = 6.28318584f; 31 | 32 | TYPE_H hi = h[i]; 33 | A = 0.0f; 34 | B = 0.0f; 35 | 36 | int jmax = a_end - a_start; 37 | for (j=0; j < jmax; j++) { 38 | float A1,B1; 39 | float4 aj = ashared[j]; 40 | float arg = twopi*(hi.x*aj.y + 41 | hi.y*aj.z + 42 | hi.z*aj.w); 43 | sincosf(arg, &B1, &A1); 44 | A += aj.x*A1; 45 | B += aj.x*B1; 46 | } 47 | E[i].x += A*f2; 48 | E[i].y += B*f2; 49 | } 50 | __syncthreads(); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-cuda/krist.h: -------------------------------------------------------------------------------- 1 | 2 | #define DIM2_H 4 3 | #define DIM2_A 4 4 | #define DIM2_E 2 5 | 6 | #if DIM2_H == 4 7 | #define TYPE_H float4 8 | #endif 9 | #if DIM2_H == 3 10 | #define TYPE_H float3 11 | #endif 12 | #if DIM2_A == 4 13 | #define TYPE_A float4 14 | #endif 15 | #if DIM2_A == 3 16 | #define TYPE_A float3 17 | #endif 18 | #if DIM2_E == 4 19 | #define TYPE_E float4 20 | #endif 21 | #if DIM2_E == 3 22 | #define TYPE_E float3 23 | #endif 24 | #if DIM2_E == 2 25 | #define TYPE_E float2 26 | #endif 27 | 28 | #ifdef __cplusplus 29 | extern "C" 30 | { 31 | #endif 32 | 33 | #pragma omp target device(cuda) copy_deps ndrange(/*???*/) 34 | #pragma omp task /* in and outs? */ 35 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA, 36 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E); 37 | 38 | #ifdef __cplusplus 39 | } 40 | #endif 41 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/.config/krist.h: -------------------------------------------------------------------------------- 1 | 2 | #define DIM2_H 4 3 | #define DIM2_A 4 4 | #define DIM2_E 2 5 | 6 | #if DIM2_H == 4 7 | #define TYPE_H float4 8 | #endif 9 | #if DIM2_H == 3 10 | #define TYPE_H float3 11 | #endif 12 | #if DIM2_A == 4 13 | #define TYPE_A float4 14 | #endif 15 | #if DIM2_A == 3 16 | #define TYPE_A float3 17 | #endif 18 | #if DIM2_E == 4 19 | #define TYPE_E float4 20 | #endif 21 | #if DIM2_E == 3 22 | #define TYPE_E float3 23 | #endif 24 | #if DIM2_E == 2 25 | #define TYPE_E float2 26 | #endif 27 | 28 | #ifndef __OPENCL_VERSION__ 29 | #pragma omp target device(opencl) copy_deps ndrange(1,nr,128) 30 | #pragma omp task in([NA] a, [NH] h) out([NE] E) 31 | __kernel void cstructfac(int na, int nr, int nc, float f2, 32 | int NA, __global float* a, int NH , __global float* h, int NE, __global float* E); 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=krist-p 2 | 3 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 4 | 5 | ./$PROGRAM 10000 20000 6 | 7 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=krist-p 2 | 3 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 4 | 5 | ./$PROGRAM 1000 2000 6 | 7 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=krist 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | 14 | CFLAGS = --ompss --opencl 15 | CFLAGS_P = 16 | CFLAGS_I = --instrument 17 | CFLAGS_D = --debug 18 | 19 | LIBS = 20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX) 21 | 22 | EXTRA = -O3 -Wall -Wno-unused 23 | 24 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl clocks.o 25 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o 26 | 27 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl clocks.o 28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o 29 | 30 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl clocks.o 31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o 32 | 33 | .c.o: 34 | $(CC) --no-openmp $(EXTRA) -c $< 35 | 36 | clean: 37 | rm -f $(CC)_* *.o *~ $(TARGETS) 38 | 39 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/README.rst: -------------------------------------------------------------------------------- 1 | ../krist-cuda/README.rst -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/clocks.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "unistd.h" 6 | double cputime() /* aix, xlf */ 7 | { 8 | struct tms b; 9 | clock_t r; 10 | times( &b); 11 | r = b.tms_utime + b.tms_stime; 12 | return ( (double) r/(double) sysconf(_SC_CLK_TCK)); 13 | } 14 | double CPUTIME() /* cray */ 15 | { 16 | return ( cputime()); 17 | } 18 | double cputime_() /* g77, gcc */ 19 | { 20 | return ( cputime()); 21 | } 22 | 23 | double wallclock() 24 | { 25 | struct timeval toot; 26 | //struct timezone prut; 27 | double r; 28 | 29 | //gettimeofday(&toot,&prut); 30 | gettimeofday(&toot, NULL); 31 | r=toot.tv_sec+0.000001*(double)toot.tv_usec; 32 | return(r); 33 | } 34 | double WALLCLOCK() 35 | { 36 | return (wallclock()); 37 | } 38 | double wallclock_() 39 | { 40 | return wallclock(); 41 | } 42 | 43 | void fortransleep(int *i) 44 | { 45 | sleep(*i); 46 | } 47 | 48 | void FORTRANSLEEP(int *i) 49 | { 50 | sleep(*i); 51 | } 52 | 53 | void fortransleep_(int *i) 54 | { 55 | sleep(*i); 56 | } 57 | 58 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/kernel.cl: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | 4 | #ifdef cl_khr_fp64 5 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable 6 | #elif defined(cl_amd_fp64) 7 | #pragma OPENCL EXTENSION cl_amd_fp64 : enable 8 | #else 9 | #error "Double precision floating point not supported by OpenCL implementation." 10 | #endif 11 | 12 | __kernel void cstructfac(int na, int nr, int nc, float f2, int NA, 13 | __global TYPE_A* a,int NH, __global TYPE_H* h,int NE,__global TYPE_E* E) 14 | { 15 | __local TYPE_A ashared[(16384-2048)/(sizeof(TYPE_A))]; 16 | int a_start; 17 | 18 | int i = get_global_id(0); 19 | if (i < nr) E[i].x = E[i].y = 0.0f; 20 | 21 | for (a_start = 0; a_start < na; a_start += nc) { 22 | int a_end = min(a_start + nc, na); 23 | int k = get_local_id(0); 24 | while (k < a_end - a_start) { 25 | ashared[k] = a[k + a_start]; 26 | k += get_local_size(0); 27 | } 28 | 29 | barrier(CLK_LOCAL_MEM_FENCE); 30 | 31 | if (i < nr) { 32 | int j; 33 | float A,B; 34 | const float twopi = 6.28318584f; 35 | 36 | TYPE_H hi = h[i]; 37 | A = 0.0f; 38 | B = 0.0f; 39 | 40 | int jmax = a_end - a_start; 41 | for (j=0; j < jmax; j++) { 42 | float A1,B1; 43 | float4 aj = ashared[j]; 44 | float arg = twopi*(hi.x*aj.y + 45 | hi.y*aj.z + 46 | hi.z*aj.w); 47 | B1=sincos(arg, &A1); 48 | A += aj.x*A1; 49 | B += aj.x*B1; 50 | } 51 | E[i].x += A*f2; 52 | E[i].y += B*f2; 53 | } 54 | barrier(CLK_LOCAL_MEM_FENCE); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /03-gpu-devices/krist-opencl/krist.h: -------------------------------------------------------------------------------- 1 | 2 | #define DIM2_H 4 3 | #define DIM2_A 4 4 | #define DIM2_E 2 5 | 6 | #if DIM2_H == 4 7 | #define TYPE_H float4 8 | #endif 9 | #if DIM2_H == 3 10 | #define TYPE_H float3 11 | #endif 12 | #if DIM2_A == 4 13 | #define TYPE_A float4 14 | #endif 15 | #if DIM2_A == 3 16 | #define TYPE_A float3 17 | #endif 18 | #if DIM2_E == 4 19 | #define TYPE_E float4 20 | #endif 21 | #if DIM2_E == 3 22 | #define TYPE_E float3 23 | #endif 24 | #if DIM2_E == 2 25 | #define TYPE_E float2 26 | #endif 27 | 28 | #ifndef __OPENCL_VERSION__ 29 | #pragma omp target device(opencl) copy_deps ndrange(/*???*/) 30 | #pragma omp task /* in and outs? */ 31 | __kernel void cstructfac(int na, int nr, int nc, float f2, 32 | int NA, __global float* a, int NH , __global float* h, int NE, __global float* E); 33 | #endif 34 | 35 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/.config/kernel.h: -------------------------------------------------------------------------------- 1 | 2 | // Thread block size 3 | #define BLOCK_SIZE 16 4 | 5 | 6 | #ifdef DP 7 | #define REAL double 8 | #else 9 | #define REAL float 10 | #endif 11 | 12 | #ifdef __cplusplus 13 | extern "C" 14 | { 15 | #endif 16 | 17 | //Kernel declaration as a task should be here 18 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB 19 | #pragma omp target device(cuda) ndrange(2,NB,NB,16,16) copy_deps 20 | #pragma omp task inout([NB*NB]C) in([NB*NB]A,[NB*NB]B) 21 | __global__ void Muld(REAL* A, REAL* B, int wA, int wB, REAL* C,int NB); 22 | 23 | #ifdef __cplusplus 24 | } 25 | #endif 26 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export IFS=";" 4 | 5 | export NX_GPUMAXMEM=90 6 | 7 | GPUS="01;02" 8 | SIZES="8192" 9 | 10 | for size in $SIZES; do 11 | # Creating the input file 12 | touch test.in 13 | echo "$size $size $size 3" > test.in 14 | for gpu in $GPUS; do 15 | # Executing the application 16 | NX_GPUS=$gpu NX_SMP_WORKERS=1 ./$PROGRAM 17 | done 18 | done 19 | 20 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_GPUS=2 #change this in order to use more GPUs 5 | 6 | export NX_GPUMAXMEM=90 7 | 8 | # Creating the input file 9 | touch test.in 10 | echo "8192 8192 8192 3" > test.in 11 | 12 | # Executing the application 13 | ./$PROGRAM 14 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | KC = nvcc 14 | 15 | CFLAGS = --ompss --cuda 16 | CFLAGS_P = 17 | CFLAGS_I = --instrument 18 | CFLAGS_D = --debug 19 | 20 | NVCFLAGS = -O3 21 | 22 | LIBS = 23 | INCS = -I$(PREFIX) 24 | 25 | PRECISION=-DDP 26 | EXTRA = -O3 -Wall -Wno-unused 27 | OBJECTS= check.o gendat.o prtspeed.o cclock.o 28 | 29 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS) 30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 31 | 32 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS) 33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 34 | 35 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS) 36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 37 | 38 | 39 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 40 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $< 41 | 42 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 43 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $< 44 | 45 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 46 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $< 47 | 48 | 49 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h 50 | $(KC) $(INCS) $(PRECISION) -o $@ -c $< 51 | 52 | .c.o: 53 | $(CC) --no-openmp $(EXTRA) $(PRECISION) -c $< 54 | 55 | clean: 56 | rm -f $(CC)_* *.o *~ $(TARGETS) 57 | rm -f test.in 58 | 59 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/cclock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* ------------------------------------------------------------------- 6 | 7 | This function returns the wall clock time with micro seconds 8 | accuracy. 9 | The data type of the returned value is "double". 10 | 11 | The function can be called from a FORTRAN module. The value 12 | returned by cclock_ and cclock should be of type REAL(Kind = 8). 13 | 14 | ------------------------------------------------------------------- 15 | */ 16 | 17 | double cclock_( void ) 18 | { 19 | const double micro = 1.0e-06; /* Conversion constant */ 20 | static long start = 0L, startu; 21 | struct timeval tp; /* Structure used by gettimeofday */ 22 | double wall_time; /* To hold the result */ 23 | 24 | 25 | if ( gettimeofday( &tp, NULL) == -1 ) 26 | wall_time = -1.0e0; 27 | else if( !start ) { 28 | start = tp.tv_sec; 29 | startu = tp.tv_usec; 30 | wall_time = 0.0e0; 31 | } 32 | else 33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu); 34 | 35 | return wall_time; 36 | } 37 | 38 | 39 | double cclock( void ) 40 | { 41 | return cclock_(); 42 | } 43 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/check.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "driver.h" 6 | 7 | #ifdef DP 8 | #define REAL double 9 | #else 10 | #define REAL float 11 | #endif 12 | 13 | //#define BSIZE 1024 14 | 15 | int check( int nrep, int m, int l, int n, int mDIM, int nDIM, REAL **c/*[][nDIM*BSIZE] */) 16 | { 17 | double eps, tvalue = (double)l; 18 | int i, j, k, o, ok = 0; 19 | 20 | eps = 2.0*l*l*DBL_EPSILON; 21 | int perfectM = m / BSIZE; 22 | int perfectN = n / BSIZE; 23 | 24 | int leftOutM = m % BSIZE; 25 | int leftOutN = n % BSIZE; 26 | 27 | for(i=0;i perfectM && k >= leftOutM ) 32 | break; 33 | else if( j == nDIM-1 && nDIM > perfectN && o >= leftOutN ) 34 | break; 35 | else { 36 | if ( fabs( tvalue - (c[i*nDIM+j][k*BSIZE+o]/nrep) ) > eps ) { 37 | ok++; 38 | //printf("Bad result at [%d][%d] : expected %f but found %f\n", i*nDIM+j, k*BSIZE+o, tvalue, c[i*nDIM+j][k*BSIZE+o]); 39 | } 40 | } 41 | } 42 | } 43 | } 44 | } 45 | 46 | return( ok ); 47 | } 48 | 49 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/driver.h: -------------------------------------------------------------------------------- 1 | //#define BSIZE 512 2 | #define BSIZE 1024 3 | //#define BSIZE 2048 4 | //#define BSIZE 4096 5 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/kernel.h: -------------------------------------------------------------------------------- 1 | 2 | // Thread block size 3 | #define BLOCK_SIZE 16 4 | 5 | #ifdef DP 6 | #define REAL double 7 | #else 8 | #define REAL float 9 | #endif 10 | 11 | #ifdef __cplusplus 12 | extern "C" 13 | { 14 | #endif 15 | 16 | //Kernel declaration as a task should be here 17 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB 18 | 19 | #ifdef __cplusplus 20 | } 21 | #endif 22 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-cuda/prtspeed.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) ) 4 | #if 0 5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops ) 6 | { 7 | double speed; 8 | // ----------------------------------------------------------------- 9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 ); 10 | speed = 1.0e-9*nops/max( time, 1.0e-9 ); 11 | 12 | printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed ); 13 | if ( ok == 0 ) 14 | printf( " T |\n" ); 15 | else 16 | printf( " F (%d)|\n", ok ); 17 | } 18 | #else 19 | 20 | void prtspeed( int m, int l, int n, int nb, double time, int ok, unsigned long nops ) 21 | { 22 | double speed = 1.0e-9*nops/time; 23 | printf("Matrix size: %dx%d\n", m, n); 24 | printf("Block size: %dx%d\n", nb, nb); 25 | #ifdef DP 26 | printf("Precision type: Double\n"); 27 | #else 28 | printf("Precision type: Simple\n"); 29 | #endif 30 | 31 | printf(" GFLOPS : %.4lf\n", speed); 32 | printf(" computation time (in seconds): %.4lf\n", time); 33 | if ( ok == 0 ) { 34 | printf(" Verification: Ok\n"); 35 | } else { 36 | printf(" Verification: Failed (%d)\n", ok); 37 | } 38 | } 39 | #endif 40 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/.config/kernel.h: -------------------------------------------------------------------------------- 1 | 2 | // Thread block size 3 | #define BLOCK_SIZE 16 4 | //Mercurium pragmas can't "read" values from #defines, so we "save" the value as integer 5 | __constant int BL_SIZE= BLOCK_SIZE; 6 | 7 | 8 | #ifdef DP 9 | #define REAL double 10 | #else 11 | #define REAL float 12 | #endif 13 | 14 | #ifdef __cplusplus 15 | extern "C" 16 | { 17 | #endif 18 | 19 | //Kernel declaration as a task should be here 20 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB 21 | #pragma omp target device(opencl) ndrange(2,NB,NB,BL_SIZE,BL_SIZE) copy_deps 22 | #pragma omp task inout([NB*NB]C) in([NB*NB]A,[NB*NB]B) 23 | __kernel void Muld(__global REAL* A,__global REAL* B, int wA, int wB,__global REAL* C,int NB); 24 | 25 | #ifdef __cplusplus 26 | } 27 | #endif 28 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 5 | export NX_OPENCL_DEVICE_TYPE=GPU 6 | 7 | # Creating input file 8 | touch test.in 9 | echo "4096 4096 4096 3" > test.in 10 | 11 | # Executing the program 12 | ./$PROGRAM 13 | 14 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 5 | export NX_OPENCL_DEVICE_TYPE=GPU 6 | 7 | # Creating input file 8 | touch test.in 9 | echo "4096 4096 4096 3" > test.in 10 | 11 | # Executing the program 12 | ./$PROGRAM 13 | 14 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | 14 | CFLAGS = --ompss --opencl 15 | CFLAGS_P = 16 | CFLAGS_I = --instrument 17 | CFLAGS_D = --debug 18 | 19 | LIBS = 20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX) 21 | 22 | EXTRA = -O3 -Wall -Wno-unused 23 | OBJECTS = cclock.o driver.o prtspeed.o check.o gendat.o 24 | 25 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl $(OBJECTS) 26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS) 27 | 28 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl $(OBJECTS) 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS) 30 | 31 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl $(OBJECTS) 32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS) 33 | 34 | .c.o: 35 | $(CC) --no-openmp $(EXTRA) -c $< 36 | 37 | clean: 38 | rm -f $(CC)_* *.o *~ $(TARGETS) 39 | 40 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/README.rst: -------------------------------------------------------------------------------- 1 | ../matmul-cuda/README.rst -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/cclock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* ------------------------------------------------------------------- 6 | 7 | This function returns the wall clock time with micro seconds 8 | accuracy. 9 | The data type of the returned value is "double". 10 | 11 | The function can be called from a FORTRAN module. The value 12 | returned by cclock_ and cclock should be of type REAL(Kind = 8). 13 | 14 | ------------------------------------------------------------------- 15 | */ 16 | 17 | double cclock_( void ) 18 | { 19 | const double micro = 1.0e-06; /* Conversion constant */ 20 | static long start = 0L, startu; 21 | struct timeval tp; /* Structure used by gettimeofday */ 22 | double wall_time; /* To hold the result */ 23 | 24 | 25 | if ( gettimeofday( &tp, NULL) == -1 ) 26 | wall_time = -1.0e0; 27 | else if( !start ) { 28 | start = tp.tv_sec; 29 | startu = tp.tv_usec; 30 | wall_time = 0.0e0; 31 | } 32 | else 33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu); 34 | 35 | return wall_time; 36 | } 37 | 38 | 39 | double cclock( void ) 40 | { 41 | return cclock_(); 42 | } 43 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/check.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "driver.h" 6 | 7 | #ifdef DP 8 | #define REAL double 9 | #else 10 | #define REAL float 11 | #endif 12 | 13 | //#define BSIZE 1024 14 | 15 | int check( int nrep, int m, int l, int n, int mDIM, int nDIM, REAL **c/*[][nDIM*BSIZE] */) 16 | { 17 | double eps, tvalue = (double)l; 18 | int i, j, k, o, ok = 0; 19 | 20 | eps = 2.0*l*l*DBL_EPSILON; 21 | int perfectM = m / BSIZE; 22 | int perfectN = n / BSIZE; 23 | 24 | int leftOutM = m % BSIZE; 25 | int leftOutN = n % BSIZE; 26 | 27 | for(i=0;i perfectM && k >= leftOutM ) 32 | break; 33 | else if( j == nDIM-1 && nDIM > perfectN && o >= leftOutN ) 34 | break; 35 | else { 36 | if ( fabs( tvalue - (c[i*nDIM+j][k*BSIZE+o]/nrep) ) > eps ) { 37 | ok++; 38 | //printf("Bad result at [%d][%d] : expected %f but found %f\n", i*nDIM+j, k*BSIZE+o, tvalue, c[i*nDIM+j][k*BSIZE+o]); 39 | } 40 | } 41 | } 42 | } 43 | } 44 | } 45 | 46 | return( ok ); 47 | } 48 | 49 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/driver.h: -------------------------------------------------------------------------------- 1 | //#define BSIZE 512 2 | #define BSIZE 1024 3 | //#define BSIZE 2048 4 | //#define BSIZE 4096 5 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/kernel.h: -------------------------------------------------------------------------------- 1 | 2 | // Thread block size 3 | #define BLOCK_SIZE 16 4 | //Mercurium pragmas can't "read" values from #defines, so we "save" the value as integer 5 | __constant int BL_SIZE= BLOCK_SIZE; 6 | 7 | 8 | #ifdef DP 9 | #define REAL double 10 | #else 11 | #define REAL float 12 | #endif 13 | 14 | #ifdef __cplusplus 15 | extern "C" 16 | { 17 | #endif 18 | 19 | //Kernel declaration as a task should be here 20 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB 21 | 22 | #ifdef __cplusplus 23 | } 24 | #endif 25 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/matmul.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "driver.h" 7 | 8 | #ifdef DP 9 | #define REAL double 10 | #else 11 | #define REAL float 12 | #endif 13 | 14 | 15 | const int NB = BSIZE; 16 | 17 | 18 | void matmul( int m, int l, int n, int mDIM, int lDIM, int nDIM, REAL **tileA, REAL **tileB, 19 | REAL **tileC ) 20 | { 21 | int i, j, k; 22 | for(i = 0;i < mDIM; i++){ 23 | for (j = 0; j < nDIM; j++){ 24 | for (k = 0; k < lDIM; k++){ 25 | //Kernel call 26 | Muld(tileA[i*lDIM+k], tileB[k*nDIM+j],NB,NB, tileC[i*nDIM+j],NB); 27 | } 28 | } 29 | } 30 | #pragma omp taskwait 31 | } 32 | 33 | 34 | -------------------------------------------------------------------------------- /03-gpu-devices/matmul-opencl/prtspeed.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) ) 4 | #if 0 5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops ) 6 | { 7 | double speed; 8 | // ----------------------------------------------------------------- 9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 ); 10 | speed = 1.0e-9*nops/max( time, 1.0e-9 ); 11 | 12 | printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed ); 13 | if ( ok == 0 ) 14 | printf( " T |\n" ); 15 | else 16 | printf( " F (%d)|\n", ok ); 17 | } 18 | #else 19 | 20 | void prtspeed( int m, int l, int n, int nb, double time, int ok, unsigned long nops ) 21 | { 22 | double speed = 1.0e-9*nops/time; 23 | printf("Matrix size: %dx%d\n", m, n); 24 | printf("Block size: %dx%d\n", nb, nb); 25 | #ifdef DP 26 | printf("Precision type: Double\n"); 27 | #else 28 | printf("Precision type: Simple\n"); 29 | #endif 30 | 31 | printf(" GFLOPS : %.4lf\n", speed); 32 | printf(" computation time (in seconds): %.4lf\n", time); 33 | if ( ok == 0 ) { 34 | printf(" Verification: Ok\n"); 35 | } else { 36 | printf(" Verification: Failed (%d)\n", ok); 37 | } 38 | } 39 | #endif 40 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/.config/kernel.h: -------------------------------------------------------------------------------- 1 | #include"nbody.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #pragma omp target device(cuda) ndrange(1,size,MAX_NUM_THREADS) copy_deps 8 | #pragma omp task in(d_particles[0;number_of_particles]) out([size] output) 9 | __global__ void calculate_force_func(int size, float time_interval, int number_of_particles, 10 | Particle* d_particles, Particle *output, int first_local, 11 | int last_local); 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody-p 2 | 3 | export NX_SMP_WORKERS=1 4 | 5 | for gpus in 1 2; do 6 | export NX_GPUS=$gpus 7 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 90" ./$PROGRAM nbody_input-16384.in 8 | done 9 | 10 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/.config/nbody.h: -------------------------------------------------------------------------------- 1 | #ifndef nbody_h 2 | #define nbody_h 3 | 4 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */ 5 | 6 | typedef struct { 7 | float position_x; /* m */ 8 | float position_y; /* m */ 9 | float position_z; /* m */ 10 | float velocity_x; /* m/s */ 11 | float velocity_y; /* m/s */ 12 | float velocity_z; /* m/s */ 13 | float mass; /* kg */ 14 | float pad; 15 | } Particle; 16 | 17 | #endif /* #ifndef nbody_h */ 18 | 19 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_GPUS=2 #change this in order to use more GPUs 5 | 6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 90" ./$PROGRAM nbody_input-16384.in 7 | 8 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | KC = nvcc 14 | 15 | CFLAGS = --ompss --cuda 16 | CFLAGS_P = 17 | CFLAGS_I = --instrument 18 | CFLAGS_D = --debug 19 | 20 | NVCFLAGS = -O3 21 | 22 | LIBS = 23 | INCS = -I$(PREFIX) 24 | 25 | EXTRA = -O3 -Wall -Wno-unused 26 | OBJECTS= 27 | 28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS) 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 30 | 31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS) 32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 33 | 34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS) 35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 36 | 37 | 38 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $< 40 | 41 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $< 43 | 44 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $< 46 | 47 | 48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h 49 | $(KC) $(INCS) -o $@ -c $< 50 | 51 | .c.o: 52 | $(CC) --no-openmp $(EXTRA) -c $< 53 | 54 | clean: 55 | rm -f $(CC)_* *.o *~ $(TARGETS) 56 | 57 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/kernel.h: -------------------------------------------------------------------------------- 1 | #include"nbody.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #ifdef __cplusplus 8 | } 9 | #endif 10 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/nbody.h: -------------------------------------------------------------------------------- 1 | #ifndef nbody_h 2 | #define nbody_h 3 | 4 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */ 5 | 6 | typedef struct { 7 | float position_x; /* m */ 8 | float position_y; /* m */ 9 | float position_z; /* m */ 10 | float velocity_x; /* m/s */ 11 | float velocity_y; /* m/s */ 12 | float velocity_z; /* m/s */ 13 | float mass; /* kg */ 14 | float pad; 15 | } Particle; 16 | 17 | #endif /* #ifndef nbody_h */ 18 | 19 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-cuda/nbody_input-16384.in: -------------------------------------------------------------------------------- 1 | 16384 2 | 1.0e+10 3 | 1.0e+10 4 | 1.0e+10 5 | 1.0e+0 6 | 10 7 | 11 8 | 12345 9 | 1.0e+28 10 | nbody 11 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/.config/kernel.h: -------------------------------------------------------------------------------- 1 | #include "../nbody.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | #pragma omp target device(opencl) ndrange(1,size,MAX_NUM_THREADS) copy_deps 8 | #pragma omp task in(d_particles[0;number_of_particles]) out([size] out) 9 | __kernel void calculate_force_func(int size, float time_interval, int number_of_particles, 10 | __global Particle* d_particles,__global Particle *out, 11 | int first_local, int last_local); 12 | #ifdef __cplusplus 13 | } 14 | #endif 15 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 5 | 6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 1000000000" ./$PROGRAM nbody_input-16384.in 7 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody-p 2 | 3 | export NX_SMP_WORKERS=1 4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use 5 | 6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 1000000000" ./$PROGRAM nbody_input-16384.in 7 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=nbody 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | 14 | CFLAGS = --ompss -k --opencl 15 | CFLAGS_P = 16 | CFLAGS_I = --instrument 17 | CFLAGS_D = --debug 18 | 19 | LIBS = 20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX) 21 | 22 | EXTRA = -O3 -Wall -Wno-unused 23 | OBJECTS = 24 | 25 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl 26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl 27 | 28 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl 30 | 31 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl 32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl 33 | 34 | .c.o: 35 | $(CC) --no-openmp $(EXTRA) -c $< 36 | 37 | clean: 38 | rm -f $(CC)_* *.o *~ $(TARGETS) 39 | rm -f nbody_out.xyz 40 | 41 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/README.rst: -------------------------------------------------------------------------------- 1 | ../nbody-cuda/README.rst -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/kernel.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | const int MAX_NUM_THREADS= 128; 5 | 6 | void Particle_array_calculate_forces_opencl(Particle* this_particle_array, Particle *output_array, int number_of_particles, float time_interval ) { 7 | const int bs = number_of_particles; 8 | int i; 9 | 10 | for ( i = 0; i < number_of_particles; i += bs ) 11 | { 12 | //Calling the kernel 13 | ....(bs,time_interval,number_of_particles,this_particle_array, &output_array[i], i, i+bs-1); 14 | } 15 | } -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/kernel.h: -------------------------------------------------------------------------------- 1 | #include "nbody.h" 2 | 3 | #ifdef __cplusplus 4 | extern "C" { 5 | #endif 6 | 7 | 8 | #ifdef __cplusplus 9 | } 10 | #endif 11 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/nbody.h: -------------------------------------------------------------------------------- 1 | /* nbody.h */ 2 | 3 | #ifndef nbody_h 4 | #define nbody_h 5 | 6 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */ 7 | 8 | typedef struct { 9 | float position_x; /* m */ 10 | float position_y; /* m */ 11 | float position_z; /* m */ 12 | float velocity_x; /* m/s */ 13 | float velocity_y; /* m/s */ 14 | float velocity_z; /* m/s */ 15 | float mass; /* kg */ 16 | float pad; 17 | } Particle; 18 | 19 | __constant int MAX_NUM_THREADS= 128; 20 | 21 | #endif /* #ifndef nbody_h */ 22 | 23 | -------------------------------------------------------------------------------- /03-gpu-devices/nbody-opencl/nbody_input-16384.in: -------------------------------------------------------------------------------- 1 | 16384 2 | 1.0e+10 3 | 1.0e+10 4 | 1.0e+10 5 | 1.0e+0 6 | 10 7 | 11 8 | 12345 9 | 1.0e+28 10 | nbody 11 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/.config/kernel.h: -------------------------------------------------------------------------------- 1 | 2 | #ifdef __cplusplus 3 | extern "C" 4 | { 5 | #endif 6 | 7 | #pragma omp target device(cuda) copy_deps ndrange( 1,n,128 ) 8 | #pragma omp task in([n]x) inout([n]y) 9 | __global__ void saxpy(int n, float a,float* x, float* y); 10 | 11 | #ifdef __cplusplus 12 | } 13 | #endif 14 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=saxpy-p 2 | 3 | export NX_GPUMAXMEM=90 4 | 5 | for gpus in 1 2; do 6 | export NX_GPUS=$gpus 7 | ./$PROGRAM 8 | done 9 | 10 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=saxpy-p 2 | 3 | export NX_GPUMAXMEM=90 4 | export NX_GPUS=2 #change this in order to use more GPUs 5 | 6 | ./$PROGRAM 7 | 8 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=saxpy 2 | KERNEL=kernel 3 | PREFIX=. 4 | 5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 6 | 7 | JOB_SCHED_VERSION=-smp 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = mcc 13 | KC = nvcc 14 | 15 | CFLAGS = --ompss --cuda 16 | CFLAGS_P = 17 | CFLAGS_I = --instrument 18 | CFLAGS_D = --debug 19 | 20 | NVCFLAGS = -O3 21 | 22 | LIBS = 23 | INCS = -I$(PREFIX) 24 | 25 | EXTRA = -O3 -Wall -Wno-unused 26 | OBJECTS= 27 | 28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS) 29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 30 | 31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS) 32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 33 | 34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS) 35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS) 36 | 37 | 38 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $< 40 | 41 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $< 43 | 44 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h 45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $< 46 | 47 | 48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h 49 | $(KC) $(INCS) -o $@ -c $< 50 | 51 | .c.o: 52 | $(CC) --no-openmp $(EXTRA) -c $< 53 | 54 | clean: 55 | rm -f $(CC)_* *.o *~ $(TARGETS) 56 | 57 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | __global__ void saxpy(int n, float a, float* x, float* y) 4 | { 5 | int i = blockIdx.x * blockDim.x + threadIdx.x; 6 | if(i < n) y[i] = a * x[i] + y[i]; 7 | } 8 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/kernel.h: -------------------------------------------------------------------------------- 1 | #ifdef __cplusplus 2 | extern "C" 3 | { 4 | #endif 5 | 6 | #pragma omp target device(cuda) copy_deps ndrange(/*???*/) 7 | #pragma omp task in([n]x) inout([n]y) 8 | __global__ void saxpy(int n, float a,float* x, float* y); 9 | 10 | #ifdef __cplusplus 11 | } 12 | #endif 13 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-cuda/saxpy.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #define N 1024*1024 6 | #define BS 64*1024 7 | 8 | int main(int argc, char* argv[]) 9 | { 10 | float a=5, *x, *y; 11 | int i; 12 | 13 | x = (float *) malloc(N*sizeof(float)); 14 | y = (float *) malloc(N*sizeof(float)); 15 | 16 | for (i=0; i 2 | 3 | __kernel void saxpy(int n, float a, 4 | __global float* x, __global float* y) { 5 | int i = get_global_id(0); 6 | if(i < n) 7 | y[i] = a * x[i] + y[i]; 8 | } 9 | 10 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-opencl/kernel.h: -------------------------------------------------------------------------------- 1 | #pragma omp target device(opencl) copy_deps /* ndrange(???) */ 2 | #pragma omp task in([n]x) inout([n]y) 3 | __kernel void saxpy(int n, float a, 4 | __global float* x, __global float* y); 5 | -------------------------------------------------------------------------------- /03-gpu-devices/saxpy-opencl/saxpy.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define N 1024 4 | int main(int argc, char* argv[]) { 5 | float a, x[N], y[N]; 6 | a=5; 7 | int i; 8 | for (i=0; i> to your job script. 15 | -------------------------------------------------------------------------------- /04-mpi+ompss/heat/heat.h: -------------------------------------------------------------------------------- 1 | /* 2 | * heat.h 3 | * 4 | * Global definitions for the iterative solver 5 | */ 6 | 7 | #include 8 | 9 | // configuration 10 | 11 | typedef struct 12 | { 13 | float posx; 14 | float posy; 15 | float range; 16 | float temp; 17 | } 18 | heatsrc_t; 19 | 20 | typedef struct 21 | { 22 | unsigned maxiter; // maximum number of iterations 23 | unsigned resolution; // spatial resolution 24 | int algorithm; // 0=>Jacobi, 1=>Gauss 25 | 26 | unsigned visres; // visualization resolution 27 | 28 | double *u, *uhelp; 29 | double *uvis; 30 | 31 | unsigned numsrcs; // number of heat sources 32 | heatsrc_t *heatsrcs; 33 | } 34 | algoparam_t; 35 | 36 | // function declarations 37 | 38 | // misc.c 39 | int initialize( algoparam_t *param ); 40 | int finalize( algoparam_t *param ); 41 | void write_image( FILE * f, double *u, 42 | unsigned sizex, unsigned sizey ); 43 | int coarsen(double *uold, unsigned oldx, unsigned oldy , 44 | double *unew, unsigned newx, unsigned newy ); 45 | int read_input( FILE *infile, algoparam_t *param ); 46 | void print_params( algoparam_t *param ); 47 | double wtime(); 48 | 49 | // solvers in solver.c 50 | double relax_redblack( double *u, 51 | unsigned sizex, unsigned sizey ); 52 | 53 | double relax_gauss( double *u, 54 | unsigned sizex, unsigned sizey ); 55 | 56 | double relax_jacobi( double *u, double *utmp, 57 | unsigned sizex, unsigned sizey ); 58 | 59 | -------------------------------------------------------------------------------- /04-mpi+ompss/heat/test.dat: -------------------------------------------------------------------------------- 1 | 10 # iterations (25000) 2 | 4096 # resolution 3 | 0 # Algorithm 0=Jacobi 1=RedBlack 2=GaussSeidel 4 | 2 # number of heat sources 5 | 0.0 0.0 1.0 2.5 # (x,y), size temperature 6 | 0.5 1.0 1.0 2.5 # 7 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/.config/mm-image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/04-mpi+ompss/matmul/.config/mm-image.png -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | THREADS=(1 2 3 4 5 6) 4 | 5 | for thread in ${THREADS[@]}; do 6 | NX_SMP_WORKERS=$thread ${MPIRUN_COMMAND} ./$PROGRAM 7 | done 8 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul-p 2 | 3 | # Run with 6 threads per MPI process in the same node 4 | export NX_SMP_WORKERS=6 5 | 6 | # Uncomment to instrument 7 | #export INST=./graph.sh 8 | #export INST=./trace.sh 9 | 10 | ${MPIRUN_COMMAND} $INST ./$PROGRAM 11 | 12 | # Generate the trace if needed 13 | if [[ "$INST" == *"trace"* ]]; then 14 | mpi2prv -f TRACE.mpits -o myTrace.prv 15 | fi 16 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM=matmul 2 | PREFIX=. 3 | 4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION=-mpi 7 | TRACE_VERSION=-mpi 8 | 9 | BASE_DIR=../.. 10 | include $(BASE_DIR)/common-files/Makefile 11 | 12 | CC = smpcc 13 | 14 | CFLAGS = --ompss 15 | CFLAGS_P = 16 | CFLAGS_I = --instrument 17 | CFLAGS_D = --debug 18 | 19 | LIBS = -L$(ATLAS_LIB_DIR) -lcblas -latlas 20 | INCS = -I. -I$(ATLAS_INC_DIR) 21 | 22 | 23 | EXTRA = -std=c99 -O3 -Wall -Wno-unused 24 | SOURCES = matmul.c driver.c check.c prthead.c gendat.c prtspeed.c cclock.c layouts.c 25 | 26 | all: $(TARGETS) 27 | 28 | $(PROGRAM)-p: $(SOURCES) 29 | $(MPI_NATIVE_CC)=$(CC) \ 30 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 31 | 32 | $(PROGRAM)-i: $(SOURCES) 33 | $(MPI_NATIVE_CC)=$(CC) \ 34 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 35 | 36 | $(PROGRAM)-d: $(SOURCES) 37 | $(MPI_NATIVE_CC)=$(CC) \ 38 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 39 | 40 | clean: 41 | rm -f $(CC)_* *.o *~ $(TARGETS) 42 | 43 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/bsize.h: -------------------------------------------------------------------------------- 1 | #define REAL double 2 | #define BSIZE 1024 3 | 4 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/cclock.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | /* ------------------------------------------------------------------- 6 | 7 | This function returns the wall clock time with micro seconds 8 | accuracy. 9 | The data type of the returned value is "double". 10 | 11 | The function can be called from a FORTRAN module. The value 12 | returned by cclock_ and cclock should be of type REAL(Kind = 8). 13 | 14 | ------------------------------------------------------------------- 15 | */ 16 | 17 | double cclock_( void ) 18 | { 19 | const double micro = 1.0e-06; /* Conversion constant */ 20 | static long start = 0L, startu; 21 | struct timeval tp; /* Structure used by gettimeofday */ 22 | double wall_time; /* To hold the result */ 23 | 24 | 25 | if ( gettimeofday( &tp, NULL) == -1 ) 26 | wall_time = -1.0e0; 27 | else if( !start ) { 28 | start = tp.tv_sec; 29 | startu = tp.tv_usec; 30 | wall_time = 0.0e0; 31 | } 32 | else 33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu); 34 | 35 | return wall_time; 36 | } 37 | 38 | 39 | double cclock( void ) 40 | { 41 | return cclock_(); 42 | } 43 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/check.c: -------------------------------------------------------------------------------- 1 | #include "bsize.h" 2 | #include "matmul.h" 3 | #include 4 | #include 5 | #include 6 | 7 | //#pragma omp target device (smp) copy_deps // copy_in ([ts]pb) 8 | //#pragma omp task input(pb[0:ts-1]) concurrent (*ok) 9 | void check_block (int m, int n, double (*pb)[n], double value, double eps, int *ok) 10 | { 11 | int i, j; 12 | int lok=0; 13 | 14 | for(i=0;i eps ) { 17 | lok++; 18 | } 19 | } 20 | } 21 | 22 | if (lok >0) *ok+=lok; //does not matter if no mx 23 | } 24 | 25 | int check(int m, int n, double (*C)[n], double tvalue) 26 | { 27 | double eps; 28 | int i, j, ok = 0; 29 | 30 | eps = 2.0*m*m*DBL_EPSILON; 31 | 32 | for(i=0;i<1;i++){ 33 | for(j=0;j<1;j++){ 34 | check_block( m, n, C, tvalue, eps, &ok); 35 | } 36 | } 37 | 38 | return( ok ); 39 | } 40 | 41 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/gendat.c: -------------------------------------------------------------------------------- 1 | #include "matmul.h" 2 | #include "layouts.h" 3 | #include 4 | 5 | 6 | //#pragma omp target device (smp) copy_deps 7 | //#pragma omp task output(A) 8 | void init_tile (int m, int n, double (*A)[n], double Value ) 9 | { 10 | int i, j; 11 | 12 | for( i = 0; i < m; ++i ) 13 | for( j = 0; j < n; ++j ) 14 | A[i][j] = Value; 15 | 16 | } 17 | 18 | void gendat(int m, int n, double (*A)[m], double (*B)[n], double (*C)[n]) 19 | { 20 | int i,j; 21 | double Value; 22 | 23 | for( i = 0; i < 1; ++i ) 24 | for( j = 0; j < 1; ++j ) { 25 | Value = 1.0; 26 | init_tile( n, m, A, Value); 27 | Value = 1.0; 28 | init_tile( m, n, B, Value); 29 | Value = 0.0; 30 | init_tile( m, n, C, Value); 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/matmul.c: -------------------------------------------------------------------------------- 1 | #include "bsize.h" 2 | #include "matmul.h" 3 | #include "layouts.h" 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | 13 | // MPI info. Global Variables. Invariant during whole execution 14 | extern int me; 15 | extern int nodes; 16 | 17 | void matmul ( int m, int n, double (*A)[m], double (*B)[n], double (*C)[n] ) 18 | { 19 | double (*a)[m]; 20 | double (*rbuf)[m]; 21 | double (*orig_rbuf)[m]; 22 | void *ptmp; 23 | int up, down; 24 | int i; 25 | int it; 26 | int tag = 1000; 27 | int size = m*n; 28 | MPI_Status stats; 29 | 30 | orig_rbuf = rbuf = (double (*)[m])malloc(m*n*sizeof(double)); 31 | if (nodes >1) { 32 | up = me0 ? me-1:nodes-1; 34 | } else { 35 | up = down = MPI_PROC_NULL; 36 | } 37 | 38 | a=A; 39 | i = n*me; // first C block (different for each process) 40 | size = m*n; 41 | 42 | 43 | for( it = 0; it < nodes; it++ ) { 44 | 45 | #pragma omp task in (a[0:n-1], B[0:m-1]) inout (C[i:i+n-1][0:n-1]) firstprivate (n,m) 46 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, m, 1.0, (double *)a, m, (double *)B, n, 1.0, (double *)&C[i][0], n); 47 | 48 | if (it < nodes-1) { 49 | #pragma omp task in (a[0:n-1]) out (rbuf[0:n-1]) inout(stats) firstprivate (size,m,n,tag,down,up) 50 | MPI_Sendrecv( a, size, MPI_DOUBLE, down, tag, rbuf, size, MPI_DOUBLE, up, tag, MPI_COMM_WORLD, &stats ); 51 | } 52 | 53 | i = (i+n)%m; //next C block circular 54 | ptmp=a; a=rbuf; rbuf=ptmp; //swap pointers 55 | } 56 | 57 | #pragma omp taskwait 58 | free (orig_rbuf); 59 | } 60 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/matmul.h: -------------------------------------------------------------------------------- 1 | #ifndef __CUDACC__ 2 | 3 | void matmul ( int m, int n, double (*A)[m], double (*B)[n], double (*C)[n] ); 4 | 5 | #endif // __CUDACC__ 6 | 7 | #ifdef __cplusplus 8 | extern "C" { 9 | #endif 10 | 11 | #ifdef __CUDACC__ 12 | 13 | void dgemm_cublas (int BS, double *A, double *B, double *C); 14 | 15 | #else 16 | 17 | extern int BS; 18 | 19 | //#pragma omp target device (cuda) copy_deps 20 | //#pragma omp task input([n][m]A, [m][n]B) inout([m][n]C) 21 | //void dgemm_cublas (int m, int n, double *A, double *B, double *C); 22 | 23 | 24 | #endif 25 | 26 | #ifdef __cplusplus 27 | } 28 | #endif 29 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/prthead.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void prthead( int nodes ) 4 | { 5 | printf( "matmul: Matrix-matrix multiply test C(m,n) = A(m,l)*B(l,n)\n" ); 6 | printf ("Number of MPI processes: %d\n", nodes); 7 | printf( "----------------------------------------------------------\n" ); 8 | printf( " Problem size | | | |\n" ); 9 | printf( " m | l | n | Time (s) | (Gflop/s) | OK? |\n" ); 10 | printf( "----------------------------------------------------------\n" ); 11 | fflush(stdout); 12 | 13 | } 14 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/prtspeed.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) ) 4 | 5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops ) 6 | { 7 | double speed; 8 | // ----------------------------------------------------------------- 9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 ); 10 | speed = 1.0e-9*nops/time; 11 | 12 | // printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed ); 13 | printf( "%d\t%d\t%d\t%.4lf\t %.4lf ", m, l, n, time, speed ); 14 | if ( ok == 0 ) 15 | printf( " T |\n" ); 16 | else 17 | printf( " F (%d)|\n", ok ); 18 | // printf( "nops = %lu; m = %d; l = %d; n = %d\n", nops, m, l, n ); 19 | 20 | fflush(stdout); 21 | } 22 | -------------------------------------------------------------------------------- /04-mpi+ompss/matmul/test.in: -------------------------------------------------------------------------------- 1 | 1024 1024 2 2 | 2048 2048 1 3 | 2048 2048 1 4 | 4096 4096 1 5 | -------------------------------------------------------------------------------- /05-ompss+dlb/README.rst: -------------------------------------------------------------------------------- 1 | OmpSs+DLB Exercises 2 | ******************* 3 | 4 | .. toctree:: 5 | :maxdepth: 2 6 | :numbered: 7 | 8 | pils/README.rst 9 | lulesh/README.rst 10 | lub/README.rst 11 | pils-multiapp/README.rst 12 | -------------------------------------------------------------------------------- /05-ompss+dlb/lub/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=LUB-p 2 | 3 | # Uncomment to enable DLB 4 | # export NX_ARGS+=" --thread-manager=dlb" 5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK" 6 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi.so 7 | 8 | export NX_ARGS+=" --force-tie-master --warmup-threads" 9 | 10 | for i in $(seq 1 3) ; do 11 | mpirun env LD_PRELOAD=$OMPSSEE_LD_PRELOAD ./$PROGRAM 8000 100 | grep 'time to compute' 12 | done 13 | -------------------------------------------------------------------------------- /05-ompss+dlb/lub/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=LUB-i 2 | 3 | # Uncomment to instrument 4 | # export INST=./trace.sh 5 | 6 | # Uncomment to enable DLB 7 | # export NX_ARGS+=" --thread-manager=dlb" 8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK" 9 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so 10 | 11 | export NX_ARGS+=" --force-tie-master --warmup-threads" 12 | 13 | mpirun $INST ./$PROGRAM 2000 100 14 | 15 | -------------------------------------------------------------------------------- /05-ompss+dlb/lub/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM = LUB 2 | PREFIX = . 3 | 4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION = -mpi 7 | 8 | BASE_DIR = ../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = smpcc 12 | 13 | CFLAGS = --ompss 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug -DCHECK_RESULT 17 | 18 | LIBS = -lm 19 | INCS = 20 | 21 | EXTRA = 22 | 23 | RM = rm -f 24 | 25 | SOURCES = LUB.c 26 | 27 | all: $(TARGETS) 28 | 29 | $(PROGRAM)-p: $(SOURCES) 30 | $(MPI_NATIVE_CC)=$(CC) \ 31 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 32 | 33 | $(PROGRAM)-i: $(SOURCES) 34 | $(MPI_NATIVE_CC)=$(CC) \ 35 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 36 | 37 | $(PROGRAM)-d: $(SOURCES) 38 | $(MPI_NATIVE_CC)=$(CC) \ 39 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 40 | 41 | clean: 42 | $(RM) $(CC)_* *.o *~ $(TARGETS) 43 | -------------------------------------------------------------------------------- /05-ompss+dlb/lub/README.rst: -------------------------------------------------------------------------------- 1 | LUB 2 | --- 3 | 4 | .. highlight:: none 5 | 6 | LUB is an LU matrix decomposition by blocks 7 | 8 | Usage:: 9 | 10 | ./LUB 11 | 12 | **Goals of this exercise** 13 | 14 | * Run the instrumented version of LUB and analyse the Paraver trace. 15 | * Enable DLB options. Run and analyse the Paraver trace. 16 | * Run the multirun.sh script and compare the execution performance with and without DLB. 17 | -------------------------------------------------------------------------------- /05-ompss+dlb/lulesh/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=lulesh2.0-p 2 | 3 | # Uncomment to enable DLB 4 | # export NX_ARGS+=" --thread-manager=dlb" 5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK" 6 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi.so 7 | # export I_MPI_WAIT_MODE=1 8 | 9 | export NX_ARGS+=" --force-tie-master --warmup-threads" 10 | 11 | for i in $(seq 1 3) ; do 12 | mpirun -n 27 env LD_PRELOAD=$OMPSSEE_LD_PRELOAD ./$PROGRAM -i 15 -b 8 -s 100 \ 13 | | tac | grep -m 1 'Elapsed time' 14 | done 15 | -------------------------------------------------------------------------------- /05-ompss+dlb/lulesh/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=lulesh2.0-i 2 | 3 | # Uncomment to instrument 4 | # export INST=./trace.sh 5 | 6 | # Uncomment to enable DLB 7 | # export NX_ARGS+=" --thread-manager=dlb" 8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK" 9 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so 10 | # export I_MPI_WAIT_MODE=1 11 | 12 | export NX_ARGS+=" --force-tie-master --warmup-threads" 13 | 14 | mpirun -n 27 $INST ./$PROGRAM -i 5 -b 8 -s 100 15 | -------------------------------------------------------------------------------- /05-ompss+dlb/lulesh/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM = lulesh2.0 2 | PREFIX = . 3 | 4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION = -mpi 7 | 8 | BASE_DIR = ../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CXX = smpcxx 12 | 13 | CFLAGS = --ompss 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = -lm 19 | INCS = 20 | 21 | EXTRA = -DUSE_MPI=1 22 | 23 | RM = rm -f 24 | 25 | SOURCES = lulesh.cc lulesh-comm.cc lulesh-viz.cc lulesh-util.cc lulesh-init.cc 26 | 27 | all: $(TARGETS) 28 | 29 | $(PROGRAM)-p: $(SOURCES) 30 | $(MPI_NATIVE_CXX)=$(CXX) \ 31 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 32 | 33 | $(PROGRAM)-i: $(SOURCES) 34 | $(MPI_NATIVE_CXX)=$(CXX) \ 35 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 36 | 37 | $(PROGRAM)-d: $(SOURCES) 38 | $(MPI_NATIVE_CXX)=$(CXX) \ 39 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 40 | 41 | clean: 42 | $(RM) $(CC)_* *.o *~ $(TARGETS) 43 | -------------------------------------------------------------------------------- /05-ompss+dlb/lulesh/README.rst: -------------------------------------------------------------------------------- 1 | Lulesh 2 | ------ 3 | 4 | .. highlight:: none 5 | 6 | Lulesh is a benchmark from LLNL, it represents a typical hydrocode like ALE3D. 7 | 8 | Usage:: 9 | 10 | ./lulesh2.0 -i -b -s 11 | 12 | 13 | **Goals of this exercise** 14 | 15 | * Run the instrumented version of Lulesh and analyse the Paraver trace. 16 | * Enable DLB options, MPI interception included. Run and analyse the Paraver trace. 17 | * Run the multirun.sh script and compare the execution performance with and without DLB. 18 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=ompss_pils-p 2 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=ompss_pils-i 2 | 3 | # Uncomment to instrument 4 | # export INST=./trace-multiapp.sh 5 | 6 | # Uncomment to enable DLB 7 | # export NX_ARGS+=" --thread-manager=dlb" 8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask" 9 | 10 | export NX_ARGS+=" --warmup-threads" 11 | 12 | export TRACEID=TRACE1 13 | taskset -c 0-7 $INST ./$PROGRAM input1 1 100 500 & 14 | 15 | export TRACEID=TRACE2 16 | taskset -c 8-15 $INST ./$PROGRAM input2 1 100 50 & 17 | 18 | wait 19 | 20 | if [[ -n "$INST" ]] ; then 21 | mpi2prv -f TRACE1.mpits -- -f TRACE2.mpits -o myTrace.prv 22 | fi 23 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM = ompss_pils 2 | PREFIX = . 3 | 4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION = -smp 7 | 8 | BASE_DIR = ../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = smpcc 12 | 13 | CFLAGS = --ompss 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = 19 | INCS = 20 | 21 | EXTRA = -std=c99 -Wall -Wno-unused 22 | 23 | RM = rm -f 24 | 25 | SOURCES = ompss_pils.c 26 | 27 | all: $(TARGETS) 28 | 29 | $(PROGRAM)-p: $(SOURCES) 30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 31 | 32 | $(PROGRAM)-i: $(SOURCES) 33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 34 | 35 | $(PROGRAM)-d: $(SOURCES) 36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 37 | 38 | clean: 39 | $(RM) $(CC)_* *.o *~ $(TARGETS) 40 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/README.rst: -------------------------------------------------------------------------------- 1 | PILS - multiapp example 2 | ----------------------- 3 | 4 | .. highlight:: c 5 | 6 | This example demonstrates the capabilities of DLB sharing resources with two different 7 | unrelated applications. The run-once.sh script executes two instances of PILS without 8 | MPI support, each one in a different set of CPUs. DLB is able to automatically lend 9 | resources from one to another. 10 | 11 | **Goals of this exercise** 12 | 13 | * Run the script run-once.sh with tracing and DLB enabled, and observe how two 14 | unrelated applications share resources. 15 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/extrae-multiapp.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 1-3 18 | 1-5 19 | 1-3 20 | 21 | 22 | 23 | $TRACEID$ 24 | 5 25 | 26 | 27 | 28 | 29 | 30 | 31 | 5000000 32 | 33 | 34 | 35 | 36 | 37 | 38 | PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_L1_DCM 39 | PAPI_TOT_CYC 40 | 41 | 42 | PAPI_TOT_INS,PAPI_FP_INS,PAPI_TOT_CYC 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | my_trace.prv 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/input1: -------------------------------------------------------------------------------- 1 | 20 2 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/input2: -------------------------------------------------------------------------------- 1 | 200 2 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils-multiapp/trace-multiapp.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export EXTRAE_CONFIG_FILE=extrae-multiapp.xml 4 | export NX_INSTRUMENTATION=extrae 5 | 6 | $* 7 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils/.config/multirun.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=mpi_ompss_pils-p 2 | 3 | # Uncomment to enable DLB 4 | # export NX_ARGS+=" --thread-manager=dlb" 5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask" 6 | 7 | export NX_ARGS+=" --force-tie-master --warmup-threads" 8 | 9 | for i in $(seq 1 3) ; do 10 | mpirun ./$PROGRAM /dev/null 1 10 500 | grep 'Application time' 11 | done 12 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils/.config/run-once.sh: -------------------------------------------------------------------------------- 1 | PROGRAM=mpi_ompss_pils-i 2 | 3 | # Uncomment to instrument 4 | # export INST=./trace.sh 5 | 6 | # Uncomment to enable DLB 7 | # export NX_ARGS+=" --thread-manager=dlb" 8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask" 9 | 10 | # Uncomment to enable DLB MPI interception 11 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so 12 | 13 | export NX_ARGS+=" --force-tie-master --warmup-threads" 14 | 15 | mpirun $INST ./$PROGRAM /dev/null 1 5 500 16 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils/Makefile: -------------------------------------------------------------------------------- 1 | PROGRAM = mpi_ompss_pils 2 | PREFIX = . 3 | 4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d 5 | 6 | JOB_SCHED_VERSION = -mpi 7 | 8 | BASE_DIR = ../.. 9 | include $(BASE_DIR)/common-files/Makefile 10 | 11 | CC = smpcc 12 | 13 | CFLAGS = --ompss 14 | CFLAGS_P = 15 | CFLAGS_I = --instrument 16 | CFLAGS_D = --debug 17 | 18 | LIBS = -lm 19 | INCS = 20 | 21 | EXTRA = -std=c99 -Wall -Wno-unused 22 | 23 | RM = rm -f 24 | 25 | SOURCES = mpi_ompss_pils.c 26 | 27 | all: $(TARGETS) 28 | 29 | $(PROGRAM)-p: $(SOURCES) 30 | $(MPI_NATIVE_CC)=$(CC) \ 31 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 32 | 33 | $(PROGRAM)-i: $(SOURCES) 34 | $(MPI_NATIVE_CC)=$(CC) \ 35 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 36 | 37 | $(PROGRAM)-d: $(SOURCES) 38 | $(MPI_NATIVE_CC)=$(CC) \ 39 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS) 40 | 41 | clean: 42 | $(RM) $(CC)_* *.o *~ $(TARGETS) 43 | -------------------------------------------------------------------------------- /05-ompss+dlb/pils/README.rst: -------------------------------------------------------------------------------- 1 | PILS (Parallel ImbaLance Simulator) 2 | ----------------------------------- 3 | 4 | .. highlight:: none 5 | 6 | PILS is an MPI+OpenMP/OmpSs synthetic benchmark that measures the execution time 7 | of imbalanced MPI ranks. 8 | 9 | Usage:: 10 | 11 | ./mpi_ompss_pils 12 | loads-file: file with load balance (number of tasks per iteration) per process, [100, 250] if /dev/null 13 | parallel-grain: parallelism grain, factor between 0..1 to apply sub-blocking techniques 14 | loops: number of execution loops 15 | task_size: factor to increase task size 16 | 17 | **Goals of this exercise** 18 | 19 | * Run the instrumented version of PILS and generate a Paraver trace. 20 | 21 | * Analyse the load imbalance between MPI ranks. 22 | 23 | * Enable DLB and compare both executions. 24 | 25 | * Observe the dynamic thread creation when other processes suffer load imbalance. 26 | * Analyse the load imbalance of the new execution. Does it improve? 27 | 28 | * Enable DLB MPI interception and trace again. Analyse the new trace. 29 | * Run the multirun.sh script and compare the execution performance with and without DLB. 30 | * Modify the inputs of PILS to reduce load imbalance and see when DLB stops improving performance. 31 | 32 | -------------------------------------------------------------------------------- /common-files/Makefile: -------------------------------------------------------------------------------- 1 | COMMON_DIR=$(BASE_DIR)/common-files 2 | CONFIG_DIR=.config 3 | 4 | GFLAGS= 5 | 6 | PARAVER=extrae.xml trace.sh 7 | GRAPH=graph.sh 8 | SCRIPTS=run-once.sh multirun.sh 9 | 10 | MPI_CHECK=$(MPI_LIB_DIR)/libmpi.so 11 | MKL_CHECK=$(MKL_LIB_DIR)/libmkl_sequential.so 12 | ATLAS_CHECK=$(ATLAS_LIB_DIR)/libatlas.a 13 | 14 | all: $(TARGETS) $(SCRIPTS) $(PARAVER) $(GRAPH) 15 | 16 | extrae.xml: 17 | cp $(COMMON_DIR)/extrae.xml . 18 | 19 | trace.sh: 20 | cp $(COMMON_DIR)/trace$(TRACE_VERSION).sh trace.sh 21 | 22 | graph.sh: 23 | cp $(COMMON_DIR)/graph.sh . 24 | 25 | run-once.sh: $(COMMON_DIR)/run-once.sh $(CONFIG_DIR)/run-once.sh 26 | cp $(COMMON_DIR)/run-once.sh . 27 | cat $(COMMON_DIR)/sched-job$(JOB_SCHED_VERSION) >> run-once.sh 28 | cat $(CONFIG_DIR)/run-once.sh >> run-once.sh 29 | 30 | multirun.sh: $(COMMON_DIR)/multirun.sh $(CONFIG_DIR)/multirun.sh 31 | cp $(COMMON_DIR)/multirun.sh . 32 | cat $(COMMON_DIR)/sched-job$(JOB_SCHED_VERSION) >> multirun.sh 33 | cat $(CONFIG_DIR)/multirun.sh >> multirun.sh 34 | 35 | $(MPI_CHECK): 36 | @echo "==================================================" 37 | @echo "=== MPI LIBRARY NEEDED ===" 38 | @echo "==================================================" 39 | @false 40 | 41 | $(MKL_CHECK): 42 | @echo "==================================================" 43 | @echo "=== MKL LIBRARY NEEDED ===" 44 | @echo "==================================================" 45 | @false 46 | 47 | $(ATLAS_CHECK): 48 | @echo "==================================================" 49 | @echo "=== ATLAS LIBRARY NEEDED ===" 50 | @echo "==================================================" 51 | @false 52 | 53 | wipe: clean 54 | rm -f $(PARAVER) 55 | rm -f $(SCRIPTS) 56 | rm -f *.out *.err 57 | rm -f *.prv *.row *.pcf 58 | rm -f TRACE.mpits 59 | rm -f -r set-0 60 | rm -f graph.dot graph.pdf 61 | -------------------------------------------------------------------------------- /common-files/configure_VirtualBox: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]})) 4 | 5 | export OMPSS_HOME=/home/user/Builds/OmpSs/mcxx 6 | export EXTRAE_HOME=/home/user/Builds/extrae 7 | export PARAVER_HOME=/home/user/Tools/paraver 8 | export TEMANEJO_HOME=/home/user/Builds/temanejo 9 | export MPI_HOME=/usr/lib/openmpi 10 | export MPI_LIB_DIR=$MPI_HOME/lib 11 | export MPI_INC_DIR=$MPI_HOME/include 12 | export MPI_CC=mpicc 13 | export MPIRUN_COMMAND="mpirun" 14 | export MKL_LIB_DIR=/home/user/Builds/mkl/lib/intel64 15 | export MKL_INC_DIR=/home/user/Builds/mkl/include 16 | export ATLAS_LIB_DIR=/usr/lib 17 | export ATLAS_INC_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/include 18 | 19 | touch $DIRNAME/sched-job-smp 20 | touch $DIRNAME/sched-job-mpi 21 | -------------------------------------------------------------------------------- /common-files/configure_default: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]})) 4 | 5 | echo \ \ Using default configuration file: 6 | echo \ \ $BASH_SOURCE 7 | 8 | export OMPSS_HOME= 9 | export EXTRAE_HOME= 10 | export PARAVER_HOME= 11 | export TEMANEJO_HOME= 12 | export MPI_LIB_DIR= 13 | export MPI_INC_DIR= 14 | export MPIRUN_COMMAND="mpirun" 15 | export MKL_LIB_DIR= 16 | export MKL_INC_DIR= 17 | export ATLAS_LIB_DIR=/home/xteruel/Applications/atlas-3.10.3/lib/ 18 | export ATLAS_INC_DIR= 19 | 20 | touch $DIRNAME/sched-job-smp 21 | touch $DIRNAME/sched-job-mpi 22 | -------------------------------------------------------------------------------- /common-files/configure_mn4: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]})) 4 | 5 | export OMPSS_HOME=/apps/PM/ompss/git 6 | export DLB_HOME=/apps/PM/dlb/latest/impi 7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_2017_4 8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest 9 | export TEMANEJO_HOME= 10 | export MPI_HOME=/apps/INTEL/2017.4/impi/2017.3.196 11 | export MPI_LIB_DIR=$MPI_HOME/lib 12 | export MPI_INC_DIR=$MPI_HOME/include 13 | export MPICC=mpiicc 14 | export MPICXX=mpiicpc 15 | export MPI_NATIVE_CC=I_MPI_CC 16 | export MPI_NATIVE_CXX=I_MPI_CXX 17 | export MPIRUN_COMMAND="mpirun" 18 | export MKL_LIB_DIR=/apps/INTEL/2017.4/mkl/lib/intel64 19 | export MKL_INC_DIR=/apps/INTEL/2017.4/mkl/include 20 | export ATLAS_LIB_DIR=/apps/ATLAS/3.10.3/INTEL_BK/lib 21 | export ATLAS_INC_DIR=/apps/ATLAS/3.10.3/INTEL_BK/include 22 | 23 | ln -sf $DIRNAME/sched-job-smp_mn4 $DIRNAME/sched-job-smp 24 | ln -sf $DIRNAME/sched-job-mpi_mn4 $DIRNAME/sched-job-mpi 25 | 26 | module unload openmpi 27 | module load impi/2017.4 28 | -------------------------------------------------------------------------------- /common-files/configure_nord3: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]})) 4 | 5 | export OMPSS_HOME=/apps/PM/ompss/git 6 | export DLB_HOME=/apps/PM/dlb/latest/impi 7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_5_1_3_210 8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest 9 | export TEMANEJO_HOME= 10 | export MPI_HOME=/apps/INTEL/2016.3.067/impi/5.1.3.210/intel64 11 | export MPI_LIB_DIR=$MPI_HOME/lib 12 | export MPI_INC_DIR=$MPI_HOME/include 13 | export MPICC=mpiicc 14 | export MPICXX=mpiicpc 15 | export MPI_NATIVE_CC=I_MPI_CC 16 | export MPI_NATIVE_CXX=I_MPI_CXX 17 | export MPIRUN_COMMAND="mpirun" 18 | 19 | # Note (vlopez); wxparaver needs glibcxx from gcc >= 5 20 | module load gcc/5.1.0 21 | module unload openmpi 22 | module load impi/5.1.3.210 23 | # Note (gmiranda): if you don't do this, mpiicc can't find icc. Fixme! 24 | module load intel/16.0.0 25 | export MKL_LIB_DIR=/opt/intel/mkl/lib/intel64/ 26 | export MKL_INC_DIR=/opt/intel/mkl/include/ 27 | export ATLAS_LIB_DIR=/apps/ATLAS/3.10.2/lib 28 | export ATLAS_INC_DIR=/apps/ATLAS/3.10.2/include 29 | 30 | ln -sf $DIRNAME/sched-job-smp_nord3 $DIRNAME/sched-job-smp 31 | ln -sf $DIRNAME/sched-job-mpi_nord3 $DIRNAME/sched-job-mpi 32 | 33 | # Python configuration (needed by Temanejo) 34 | module load python 35 | -------------------------------------------------------------------------------- /common-files/configure_nvidia: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]})) 4 | 5 | export OMPSS_HOME=/apps/PM/ompss/19.06/ 6 | export DLB_HOME=/apps/PM/dlb/latest/bullxmpi 7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_2017_1_132 8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest 9 | export TEMANEJO_HOME= 10 | export MPI_HOME=/apps/INTEL/2017.1-043/impi/2017.1.132 11 | export MPI_LIB_DIR=$MPI_HOME/lib64 12 | export MPI_INC_DIR=$MPI_HOME/include64 13 | export MPICC=mpiicc 14 | export MPICXX=mpiicpc 15 | export MPI_NATIVE_CC=I_MPI_CC 16 | export MPI_NATIVE_CXX=I_MPI_CXX 17 | export MPIRUN_COMMAND="srun --cpu_bind=cores" 18 | export MKL_LIB_DIR=/opt/compilers/intel/2016.3.067/mkl/lib/intel64/ 19 | export MKL_INC_DIR=/opt/compilers/intel/2016.3.067/mkl/include 20 | export ATLAS_LIB_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/lib 21 | export ATLAS_INC_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/include/ 22 | 23 | ln -sf $DIRNAME/sched-job-smp_nvidia $DIRNAME/sched-job-smp 24 | ln -sf $DIRNAME/sched-job-mpi_nvidia $DIRNAME/sched-job-mpi 25 | 26 | module unload gcc 27 | module load gcc/4.6.1 28 | module load GRAPHVIZ 29 | module unload bullxmpi 30 | module load impi/2017.1 31 | module unload cuda 32 | module load cuda/8.0 33 | 34 | alias submit=mnsubmit 35 | alias queue=mnq 36 | -------------------------------------------------------------------------------- /common-files/extrae.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 1-3 18 | 1-5 19 | 1-3 20 | 21 | 22 | 23 | TRACE 24 | 5 25 | 26 | 27 | 28 | 29 | 30 | 31 | 5000000 32 | 33 | 34 | 35 | 36 | 37 | 38 | PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_L1_DCM 39 | PAPI_TOT_CYC 40 | 41 | 42 | PAPI_TOT_INS,PAPI_FP_INS,PAPI_TOT_CYC 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 56 | my_trace.prv 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /common-files/graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Instrumentation to generate a task dependency graph 4 | export NX_INSTRUMENTATION=tdg 5 | 6 | $* 7 | -------------------------------------------------------------------------------- /common-files/multirun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | -------------------------------------------------------------------------------- /common-files/paraver: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | module load paraver 4 | wxparaver $* 5 | -------------------------------------------------------------------------------- /common-files/run-once.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | -------------------------------------------------------------------------------- /common-files/sched-job-mpi_mn4: -------------------------------------------------------------------------------- 1 | #SBATCH --job-name=ompss-ee 2 | #SBATCH --workdir=. 3 | #SBATCH --output=ompss-ee_%j.out 4 | #SBATCH --error=ompss-ee_%j.err 5 | #SBATCH --cpus-per-task=24 6 | #SBATCH --ntasks=2 7 | #SBATCH --time=00:15:00 8 | #SBATCH --qos=debug 9 | -------------------------------------------------------------------------------- /common-files/sched-job-mpi_nord3: -------------------------------------------------------------------------------- 1 | #BSUB -n 16 2 | #BSUB -R "span[ptile=8]" 3 | #BSUB -oo ompss-ee_%J.out 4 | #BSUB -eo ompss-ee_%J.err 5 | ##BSUB -U patc5 6 | #BSUB -J ompss-ee 7 | #BSUB -W 00:15 8 | #BSUB -x 9 | 10 | -------------------------------------------------------------------------------- /common-files/sched-job-mpi_nvidia: -------------------------------------------------------------------------------- 1 | # @ job_name = ompss-ee 2 | # @ partition = debug 3 | ## @ reservation = 4 | # @ initialdir = . 5 | # @ output = ompss-ee_%j.out 6 | # @ error = ompss-ee_%j.err 7 | # @ total_tasks = 2 8 | # @ gpus_per_node = 2 9 | # @ cpus_per_task = 6 10 | # @ node_usage = not_shared 11 | # @ features = k80 12 | # @ wall_clock_limit = 00:15:00 13 | -------------------------------------------------------------------------------- /common-files/sched-job-smp_mn4: -------------------------------------------------------------------------------- 1 | #SBATCH --job-name=ompss-ee 2 | #SBATCH --workdir=. 3 | #SBATCH --output=ompss-ee_%j.out 4 | #SBATCH --error=ompss-ee_%j.err 5 | #SBATCH --cpus-per-task=48 6 | #SBATCH --ntasks=1 7 | #SBATCH --time=00:15:00 8 | #SBATCH --qos=debug 9 | -------------------------------------------------------------------------------- /common-files/sched-job-smp_nord3: -------------------------------------------------------------------------------- 1 | #BSUB -n 16 2 | #BSUB -R "span[ptile=16]" 3 | #BSUB -oo ompss-ee_%J.out 4 | #BSUB -eo ompss-ee_%J.err 5 | ##BSUB -U patc5 6 | #BSUB -J ompss-ee 7 | #BSUB -W 00:15 8 | #BSUB -x 9 | 10 | -------------------------------------------------------------------------------- /common-files/sched-job-smp_nvidia: -------------------------------------------------------------------------------- 1 | # @ job_name = ompss-ee 2 | # @ partition = debug 3 | ## @ reservation = 4 | # @ initialdir = . 5 | # @ output = ompss-ee_%j.out 6 | # @ error = ompss-ee_%j.err 7 | # @ total_tasks = 1 8 | # @ gpus_per_node = 2 9 | # @ cpus_per_task = 12 10 | # @ node_usage = not_shared 11 | # @ features = k80 12 | # @ wall_clock_limit = 00:15:00 13 | -------------------------------------------------------------------------------- /common-files/trace-mpi.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Uncomment the following line to trace MPI+OmpSs programs 4 | export LD_PRELOAD=${EXTRAE_HOME}/lib/libnanosmpitrace.so 5 | 6 | # Uncomment the following line to trace MPI+OpenMP (GNU) programs 7 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libompitrace.so 8 | 9 | export LD_PRELOAD="$LD_PRELOAD:$OMPSSEE_LD_PRELOAD" 10 | 11 | export EXTRAE_CONFIG_FILE=extrae.xml 12 | export NX_INSTRUMENTATION=extrae 13 | 14 | $* 15 | 16 | -------------------------------------------------------------------------------- /common-files/trace.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Uncomment the following line to trace MPI+OmpSs programs 4 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libnanosmpitrace.so 5 | 6 | # Uncomment the following line to trace MPI+OpenMP (GNU) programs 7 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libompitrace.so 8 | 9 | export LD_PRELOAD="$LD_PRELOAD:$OMPSSEE_LD_PRELOAD" 10 | 11 | export EXTRAE_CONFIG_FILE=extrae.xml 12 | export NX_INSTRUMENTATION=extrae 13 | 14 | $* 15 | 16 | mpi2prv -f TRACE.mpits -o myTrace.prv 17 | -------------------------------------------------------------------------------- /paraver-cfgs/cluster/network_transfers_and_bw.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Network transfers and Recv Bandwidth (MB/s) > 7 | ################################################################################ 8 | window_name Network transfers and Recv Bandwidth (MB/s) 9 | window_type single 10 | window_id 1 11 | window_position_x 1440 12 | window_position_y 362 13 | window_width 838 14 | window_height 307 15 | window_comm_lines_enabled true 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 5080.783383360959 26 | window_minimum_y 0.504376358863 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_labels_to_draw 1 38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Recv BandWidth}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 40 | window_semantic_module thread Recv BandWidth { 1, { 1 1.000000000000 } } 41 | window_filter_module tag_msg 1 3 42 | window_filter_module evt_type 1 9200011 43 | 44 | -------------------------------------------------------------------------------- /paraver-cfgs/general/sanity_checks/flushing.cfg: -------------------------------------------------------------------------------- 1 | version 3.3 2 | number_of_windows 1 3 | begin_description 4 | Dark blue indicates that OPMItrace was flushing the trace buffer to disk. This may result in a strong perturbation of the trace. 5 | end_description 6 | 7 | ################################################################################ 8 | < NEW DISPLAYING WINDOW Flushing > 9 | ################################################################################ 10 | window_name Flushing 11 | window_type single 12 | window_id 1 13 | window_position_x 275 14 | window_position_y 267 15 | window_width 600 16 | window_height 115 17 | window_comm_lines_enabled false 18 | window_flags_enabled true 19 | window_maximum_y 34.000000 20 | window_scale_relative 1.000000 21 | window_object appl { 1, { All } } 22 | window_begin_time_relative 0.000000000000 23 | window_pos_to_disp 598 24 | window_pos_of_x_scale 18 25 | window_pos_of_y_scale 85 26 | window_number_of_row 128 27 | window_click_options 0 0 1 0 0 0 28 | window_click_info 0 412653 412845 0 412749 29 | window_expanded false 30 | window_open false 31 | window_drawmode 1 32 | window_drawmode_rows 1 33 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 34 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 35 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 36 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 37 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } } 38 | window_semantic_module compose1 Is In Range { 2, { 1 32.000000, 1 32.000000 } } 39 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } } 40 | window_analyzer_executed 1 41 | window_analyzer_info 0.000000 31461579357.000000 1 128 42 | window_filter_module evt_type 1 40000003 43 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/instantaneous_parallelism.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Instantaneous parallelism 5 | 6 | 7 | end_description 8 | 9 | ################################################################################ 10 | < NEW DISPLAYING WINDOW Parallelism profile > 11 | ################################################################################ 12 | window_name Parallelism profile 13 | window_type single 14 | window_id 1 15 | window_position_x 629 16 | window_position_y 337 17 | window_width 600 18 | window_height 147 19 | window_comm_lines_enabled false 20 | window_noncolor_mode false 21 | window_maximum_y 512.000000 22 | window_minimum_y 1.000000 23 | window_level appl 24 | window_scale_relative 1.000000 25 | window_object appl { 1, { 1 } } 26 | window_begin_time_relative 0.000000000000 27 | window_pos_to_disp 597 28 | window_pos_of_x_scale 18 29 | window_pos_of_y_scale 135 30 | window_number_of_row 1 31 | window_click_options 1 0 1 1 1 0 32 | window_click_info 0 1536415172456 1538138010346 35 1537276591401 33 | window_expanded false 34 | window_open false 35 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 36 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 37 | window_analyzer_executed 0 38 | window_analyzer_info 0.000000 0.000000 0 0 39 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/not_useful.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Not Useful > 6 | ################################################################################ 7 | window_name Not Useful 8 | window_type single 9 | window_id 1 10 | window_position_x 387 11 | window_position_y 287 12 | window_width 600 13 | window_height 147 14 | window_comm_lines_enabled false 15 | window_scale_relative 0.955252 16 | window_object appl { 1, { All } } 17 | window_begin_time_relative 0.000000000000 18 | window_pos_to_disp 598 19 | window_pos_of_x_scale 18 20 | window_pos_of_y_scale 80 21 | window_number_of_row 16 22 | window_click_options 1 0 1 1 1 0 23 | window_click_info 0 1536415172456 1538138010346 35 1537276591401 24 | window_expanded false 25 | window_open false 26 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 27 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, 1-Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 28 | window_analyzer_executed 1 29 | window_analyzer_info 947824158.542278 64722849683.315544 1 16 30 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/one.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW One > 6 | ################################################################################ 7 | window_name One 8 | window_type single 9 | window_id 1 10 | window_position_x 390 11 | window_position_y 52 12 | window_width 600 13 | window_height 147 14 | window_comm_lines_enabled false 15 | window_maximum_y 16.000000 16 | window_minimum_y 1.000000 17 | window_scale_relative 1.000001 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 597 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 32 24 | window_click_options 1 0 1 1 1 0 25 | window_click_info 1 1536415172456 1538138010346 35 1537276591401 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Thread ID}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 29 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 30 | window_analyzer_executed 0 31 | window_analyzer_info 0.000000 0.000000 0 0 32 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/useful.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Useful > 6 | ################################################################################ 7 | window_name Useful 8 | window_type single 9 | window_id 1 10 | window_position_x 416 11 | window_position_y 96 12 | window_width 600 13 | window_height 147 14 | window_comm_lines_enabled false 15 | window_scale_relative 1.028112 16 | window_object appl { 1, { All } } 17 | window_begin_time_relative 0.000000000000 18 | window_pos_to_disp 583 19 | window_pos_of_x_scale 18 20 | window_pos_of_y_scale 85 21 | window_number_of_row 128 22 | window_click_options 1 0 1 1 1 0 23 | window_click_info 0 1536415172456 1538138010346 35 1537276591401 24 | window_expanded false 25 | window_open false 26 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 27 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 28 | window_analyzer_executed 0 29 | window_analyzer_info 0.000000 0.000000 0 0 30 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/useful_duration.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Useful Duration > 7 | ################################################################################ 8 | window_name Useful Duration 9 | window_type single 10 | window_id 1 11 | window_position_x 501 12 | window_position_y 37 13 | window_width 600 14 | window_height 242 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered true 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 1505618.213999999900 26 | window_minimum_y 4.928000000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 3 35 | window_drawmode_rows 4 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, State Record Dur.}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, =}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_semantic_module thread State Record Dur. { 1, { 1 1.000000000000 } } 39 | 40 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/user_functions.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Color identifies user funciton being executed by each thread 5 | end_description 6 | 7 | ################################################################################ 8 | < NEW DISPLAYING WINDOW User function x thread > 9 | ################################################################################ 10 | window_name User function x thread 11 | window_type single 12 | window_id 1 13 | window_position_x 375 14 | window_position_y 84 15 | window_width 600 16 | window_height 114 17 | window_comm_lines_enabled false 18 | window_compute_y_max 19 | window_minimum_y 10.000000 20 | window_scale_relative 1.000000 21 | window_object appl { 1, { All } } 22 | window_begin_time_relative 0.000000000000 23 | window_pos_to_disp 597 24 | window_pos_of_x_scale 18 25 | window_pos_of_y_scale 75 26 | window_number_of_row 9 27 | window_click_options 1 0 1 0 0 0 28 | window_click_info 1 21272293602 23774916378 4 22523604990 29 | window_expanded false 30 | window_open false 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 60000019 38 | -------------------------------------------------------------------------------- /paraver-cfgs/general/views/user_functions_duration.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW User function duration > 6 | ################################################################################ 7 | window_name User function duration 8 | window_type single 9 | window_id 1 10 | window_position_x 381 11 | window_position_y 257 12 | window_width 601 13 | window_height 129 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 99503731.571000 17 | window_level task 18 | window_scale_relative 1.000000 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 599 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 64 25 | window_click_options 1 0 1 0 0 0 26 | window_click_info 0 8322982511 11007815579 5 9665399045 27 | window_expanded false 28 | window_open false 29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Int. Between Evt}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 30 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 32 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 33 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } } 34 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 60000019 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/active_set.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Active counter set > 6 | ################################################################################ 7 | window_name Active counter set 8 | window_type single 9 | window_id 1 10 | window_position_x 105 11 | window_position_y 373 12 | window_width 600 13 | window_height 134 14 | window_physical_filtered true 15 | window_comm_lines_enabled false 16 | window_maximum_y 9223372036854775808.000000 17 | window_minimum_y 1.000000 18 | window_scale_relative 1.000000 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 598 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 140 24 | window_number_of_row 16 25 | window_click_options 1 0 1 0 1 0 26 | window_click_info 1 33226631649 35583130347 11 34404880998 27 | window_expanded false 28 | window_open false 29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, =}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 30 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 31 | window_analyzer_executed 0 32 | window_analyzer_info 0.000000 0.000000 0 0 33 | window_filter_module evt_type 1 42009999 34 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/architecture/L3_misses.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW L3 cache misses > 6 | ################################################################################ 7 | window_name L3 cache misses 8 | window_type single 9 | window_id 1 10 | window_position_x 408 11 | window_position_y 181 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 240505.000000 17 | window_minimum_y 1.000000 18 | window_scale_relative 1.000000 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 597 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 32 25 | window_click_options 1 0 1 0 0 0 26 | window_click_info 1 1151798560 1190191844 23 1170995202 27 | window_expanded false 28 | window_open false 29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 30 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 32 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 33 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 35 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 42000008 39 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/architecture/loaded_bytes.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Loaded Bytes > 7 | ################################################################################ 8 | window_name Loaded Bytes 9 | window_type single 10 | window_id 1 11 | window_position_x 346 12 | window_position_y 58 13 | window_width 600 14 | window_height 114 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 210450432.000000000000 26 | window_minimum_y 128.000000000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 0.999954755396 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open false 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Prod}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_semantic_module compose_thread Prod { 1, { 1 128.000000000000 } } 40 | window_filter_module evt_type 1 42000002 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/architecture/useful_loadad_bytes.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Useful Loaded Bytes > 7 | ################################################################################ 8 | window_name Useful Loaded Bytes 9 | window_type single 10 | window_id 1 11 | window_position_x 627 12 | window_position_y 283 13 | window_width 600 14 | window_height 114 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 210450432.000000000000 26 | window_minimum_y 128.000000000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open false 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Prod}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_semantic_module compose_thread Prod { 1, { 1 128.000000000000 } } 40 | window_filter_module evt_type 1 42000002 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/mx_counters/nb_medium_msgs_sent.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW nb medium msgs sent > 6 | ################################################################################ 7 | window_name nb medium msgs sent 8 | window_type single 9 | window_id 1 10 | window_position_x 293 11 | window_position_y 243 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 1600.000000 17 | window_scale_relative 0.108983 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 598 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 32 24 | window_click_options 0 0 1 0 0 0 25 | window_click_info 0 4628104571 5064718209 20 4846411390 26 | window_expanded false 27 | window_open false 28 | window_drawmode 0 29 | window_drawmode_rows 0 30 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 31 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 32 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 34 | window_analyzer_executed 0 35 | window_analyzer_info 0.000000 0.000000 0 0 36 | window_filter_module evt_type 1 11057 37 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/mx_counters/nb_rndv_msgs_sent.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW nb rndv msgs sent > 6 | ################################################################################ 7 | window_name nb rndv msgs sent 8 | window_type single 9 | window_id 1 10 | window_position_x 293 11 | window_position_y 243 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_flags_enabled true 16 | window_color_mode window_in_null_gradient_mode 17 | window_maximum_y 1600.000000 18 | window_scale_relative 1.000000 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 597 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 32 25 | window_click_options 0 0 1 0 0 0 26 | window_click_info 1 2613055805 2618823827 18 2615939816 27 | window_expanded false 28 | window_open false 29 | window_drawmode 0 30 | window_drawmode_rows 0 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 11058 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/mx_counters/nb_small_msgs_sent.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW nb small msgs sent > 6 | ################################################################################ 7 | window_name nb small msgs sent 8 | window_type single 9 | window_id 1 10 | window_position_x 293 11 | window_position_y 243 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 56357.000000 17 | window_minimum_y 23406.000000 18 | window_scale_relative 0.108983 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 598 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 32 25 | window_click_options 0 0 1 0 0 0 26 | window_click_info 0 4628104571 5064718209 20 4846411390 27 | window_expanded false 28 | window_open false 29 | window_drawmode 0 30 | window_drawmode_rows 0 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 11056 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/mx_counters/route_dispersion.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Route dispersion (Port 0) > 6 | ################################################################################ 7 | window_name Route dispersion (Port 0) 8 | window_type single 9 | window_id 1 10 | window_position_x 393 11 | window_position_y 304 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 56357.000000 17 | window_minimum_y 23406.000000 18 | window_scale_relative 1.019876 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 584 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 32 25 | window_click_options 0 0 1 0 0 0 26 | window_click_info 1 4628104571 5064718209 20 4846411390 27 | window_expanded false 28 | window_open false 29 | window_drawmode 0 30 | window_drawmode_rows 0 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 11076 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/performance/MFLOPS.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW MFLOPs > 6 | ################################################################################ 7 | window_name MFLOPs 8 | window_type single 9 | window_id 1 10 | window_position_x 393 11 | window_position_y 280 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 714.715481 17 | window_minimum_y 0.011034 18 | window_scale_relative 1.000000 19 | window_object appl { 1, { All } } 20 | window_begin_time_relative 0.000000000000 21 | window_pos_to_disp 598 22 | window_pos_of_x_scale 18 23 | window_pos_of_y_scale 80 24 | window_number_of_row 32 25 | window_click_options 1 0 1 1 1 0 26 | window_click_info 1 154474143090 154504805486 1 154489474288 27 | window_expanded false 28 | window_open false 29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 33 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 35 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 42000052 39 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/performance/MFMAS.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW FMAs per microsecond > 6 | ################################################################################ 7 | window_name FMAs per microsecond 8 | window_type single 9 | window_id 1 10 | window_position_x 369 11 | window_position_y 201 12 | window_width 600 13 | window_height 114 14 | window_color_mode window_in_null_gradient_mode 15 | window_maximum_y 2893.535012 16 | window_minimum_y 0.003315 17 | window_scale_relative 0.994231 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 598 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 28 24 | window_click_options 0 0 1 0 0 0 25 | window_click_info 1 19640173623 20816231923 7 20228202773 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 42000048 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/performance/MLoadS.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Loads per microsecond > 6 | ################################################################################ 7 | window_name Loads per microsecond 8 | window_type single 9 | window_id 1 10 | window_position_x 131 11 | window_position_y 189 12 | window_width 600 13 | window_height 114 14 | window_color_mode window_in_null_gradient_mode 15 | window_maximum_y 2893.535012 16 | window_minimum_y 0.003315 17 | window_scale_relative 0.994231 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 599 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 28 24 | window_click_options 0 0 1 0 0 0 25 | window_click_info 1 28040848529 29180720419 24 28610784474 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 42000053 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/performance/NoIssue_cycles_per_us.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW No issue cycles per microsecond > 6 | ################################################################################ 7 | window_name No issue cycles per microsecond 8 | window_type single 9 | window_id 1 10 | window_position_x 384 11 | window_position_y 103 12 | window_width 600 13 | window_height 114 14 | window_color_mode window_in_null_gradient_mode 15 | window_maximum_y 5.584969 16 | window_minimum_y 0.003315 17 | window_scale_relative 1.000000 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 597 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 28 24 | window_click_options 1 0 1 1 1 0 25 | window_click_info 1 3829117335 3945427411 8 3887272373 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 35 | window_analyzer_executed 1 36 | window_analyzer_info 0.000000 285780830000.000000 1 74 37 | window_filter_module evt_type 1 42000037 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/performance/cycles_per_us.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW cycles per us > 6 | ################################################################################ 7 | window_name cycles per us 8 | window_type single 9 | window_id 1 10 | window_position_x 312 11 | window_position_y 336 12 | window_width 600 13 | window_height 114 14 | window_color_mode window_in_null_gradient_mode 15 | window_maximum_y 2400.000000 16 | window_scale_relative 1.000000 17 | window_object appl { 1, { All } } 18 | window_begin_time_relative 0.000000000000 19 | window_pos_to_disp 598 20 | window_pos_of_x_scale 18 21 | window_pos_of_y_scale 80 22 | window_number_of_row 64 23 | window_click_options 1 0 1 1 1 0 24 | window_click_info 0 719698 719868 2 719698 25 | window_expanded false 26 | window_open false 27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 28 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 29 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 30 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 31 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 32 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } } 33 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } } 34 | window_analyzer_executed 0 35 | window_analyzer_info 0.000000 0.000000 0 0 36 | window_filter_module evt_type 1 42000059 37 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/program/Load_stores.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Load/stores > 6 | ################################################################################ 7 | window_name Load/stores 8 | window_type single 9 | window_id 1 10 | window_position_x 349 11 | window_position_y 262 12 | window_width 600 13 | window_height 114 14 | window_color_mode window_in_null_gradient_mode 15 | window_maximum_y 85729243176.000000 16 | window_scale_relative 1.081834 17 | window_object appl { 1, { All } } 18 | window_begin_time_relative 0.000000000000 19 | window_pos_to_disp 558 20 | window_pos_of_x_scale 18 21 | window_pos_of_y_scale 80 22 | window_number_of_row 64 23 | window_click_options 1 0 1 1 1 0 24 | window_click_info 0 719698 719868 2 719698 25 | window_expanded false 26 | window_open false 27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 28 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 32 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 33 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } } 34 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 42000060 38 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/program/Loads.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Loads > 6 | ################################################################################ 7 | window_name Loads 8 | window_type single 9 | window_id 1 10 | window_position_x 354 11 | window_position_y 284 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 2551665770.000000 17 | window_scale_relative 1.109738 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 546 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 32 24 | window_click_options 1 0 1 1 1 0 25 | window_click_info 0 719698 719868 2 719698 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 35 | window_semantic_module topcompose Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 42000053 39 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/program/Stores.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Stores > 6 | ################################################################################ 7 | window_name Stores 8 | window_type single 9 | window_id 1 10 | window_position_x 354 11 | window_position_y 284 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 2551665770.000000 17 | window_scale_relative 1.109738 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 546 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 32 24 | window_click_options 1 0 1 1 1 0 25 | window_click_info 0 719698 719868 2 719698 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 35 | window_semantic_module topcompose Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 42000054 39 | -------------------------------------------------------------------------------- /paraver-cfgs/hwc/papi/program/instructions.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Instructions > 6 | ################################################################################ 7 | window_name Instructions 8 | window_type single 9 | window_id 1 10 | window_position_x 241 11 | window_position_y 462 12 | window_width 600 13 | window_height 114 14 | window_comm_lines_enabled false 15 | window_color_mode window_in_null_gradient_mode 16 | window_maximum_y 451758142613.000000 17 | window_scale_relative 1.117997 18 | window_object appl { 1, { All } } 19 | window_begin_time_relative 0.000000000000 20 | window_pos_to_disp 543 21 | window_pos_of_x_scale 18 22 | window_pos_of_y_scale 80 23 | window_number_of_row 64 24 | window_click_options 1 0 1 1 1 0 25 | window_click_info 0 719698 719868 2 719698 26 | window_expanded false 27 | window_open false 28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 33 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 34 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } } 35 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 42000050 39 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/sanity_checks/backward_msgs.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Number of messages comming to a node from the future 5 | 6 | end_description 7 | 8 | ################################################################################ 9 | < NEW DISPLAYING WINDOW Incoming backward msgs > 10 | ################################################################################ 11 | window_name Incoming backward msgs 12 | window_type single 13 | window_id 1 14 | window_position_x 232 15 | window_position_y 98 16 | window_width 600 17 | window_height 671 18 | window_maximum_y 70.000000 19 | window_scale_relative 0.988383 20 | window_object appl { 1, { All } } 21 | window_begin_time_relative 0.000000000000 22 | window_pos_to_disp 598 23 | window_pos_of_x_scale 18 24 | window_pos_of_y_scale 85 25 | window_number_of_row 128 26 | window_click_options 1 0 1 0 0 0 27 | window_click_info 1 930017142 931047722 46 930532432 28 | window_expanded false 29 | window_open false 30 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Recv Negative Messages}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, <}, {evt_type, =}, {evt_value, All} } } 31 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 32 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } } 34 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } } 35 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 36 | window_analyzer_executed 1 37 | window_analyzer_info 0.000000 10705615358.000000 1 96 38 | window_filter_module bw_msg 1 0.000000 39 | window_filter_module evt_type 1 50000001 40 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/MPI_call.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Color identifies MPI call 5 | Light blue: outside MPI 6 | 7 | 8 | 9 | 10 | 11 | end_description 12 | 13 | ################################################################################ 14 | < NEW DISPLAYING WINDOW MPI call > 15 | ################################################################################ 16 | window_name MPI call 17 | window_type single 18 | window_id 1 19 | window_position_x 336 20 | window_position_y 153 21 | window_width 600 22 | window_height 114 23 | window_comm_lines_enabled false 24 | window_maximum_y 115.000000 25 | window_minimum_y 2.000000 26 | window_scale_relative 1.028112 27 | window_object appl { 1, { All } } 28 | window_begin_time_relative 0.000000000000 29 | window_pos_to_disp 580 30 | window_pos_of_x_scale 18 31 | window_pos_of_y_scale 115 32 | window_number_of_row 16 33 | window_click_options 1 0 1 0 0 0 34 | window_click_info 1 32283334120 32309105806 0 32296219963 35 | window_expanded false 36 | window_open false 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, [x,y]}, {evt_value, All} } } 38 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 39 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 40 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 41 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 42 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 43 | window_analyzer_executed 0 44 | window_analyzer_info 0.000000 0.000000 0 0 45 | window_filter_module evt_type 2 50000001 50000003 46 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/advanced/total_bytes_in_transit.cfg: -------------------------------------------------------------------------------- 1 | version 3.3 2 | number_of_windows 1 3 | begin_description 4 | Total number of bytes in transit. 5 | end_description 6 | 7 | ################################################################################ 8 | < NEW DISPLAYING WINDOW total bytes in transit > 9 | ################################################################################ 10 | window_name total bytes in transit 11 | window_type single 12 | window_id 1 13 | window_position_x 404 14 | window_position_y 517 15 | window_width 600 16 | window_height 140 17 | window_comm_lines_enabled false 18 | window_noncolor_mode false 19 | window_color_mode window_in_null_gradient_mode 20 | window_maximum_y 2723208.000000 21 | window_level appl 22 | window_scale_relative 0.117784 23 | window_object appl { 1, { 1 } } 24 | window_begin_time_relative 0.000000000000 25 | window_pos_to_disp 598 26 | window_pos_of_x_scale 18 27 | window_pos_of_y_scale 85 28 | window_number_of_row 1 29 | window_click_options 1 0 1 0 0 0 30 | window_click_info 1 13819603 14039945 0 13929774 31 | window_expanded false 32 | window_open false 33 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Recv Bytes in Transit}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 34 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/collectives/MPI_collective_call.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Color identifies MPI collective call 5 | Light blue: outside MPI collective 6 | 7 | 8 | 9 | 10 | 11 | end_description 12 | 13 | ################################################################################ 14 | < NEW DISPLAYING WINDOW MPI collective call > 15 | ################################################################################ 16 | window_name MPI collective call 17 | window_type single 18 | window_id 1 19 | window_position_x 262 20 | window_position_y 73 21 | window_width 600 22 | window_height 114 23 | window_comm_lines_enabled false 24 | window_maximum_y 70.000000 25 | window_scale_relative 1.000000 26 | window_object appl { 1, { All } } 27 | window_begin_time_relative 0.000000000000 28 | window_pos_to_disp 597 29 | window_pos_of_x_scale 18 30 | window_pos_of_y_scale 115 31 | window_number_of_row 32 32 | window_click_options 1 0 1 0 0 0 33 | window_click_info 1 32283334120 32309105806 0 32296219963 34 | window_expanded false 35 | window_open false 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 38 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 40 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 41 | window_analyzer_executed 0 42 | window_analyzer_info 0.000000 0.000000 0 0 43 | window_filter_module evt_type 1 50000002 44 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/collectives/collective_root.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Collective root > 7 | ################################################################################ 8 | window_name Collective root 9 | window_type single 10 | window_id 1 11 | window_position_x 444 12 | window_position_y 481 13 | window_width 600 14 | window_height 114 15 | window_comm_lines_enabled false 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 91.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_semantic_module task Thread i { 1, { 1 0.000000000000 } } 39 | window_filter_module evt_type 1 50100003 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/collectives/outside_collective.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Outside MPI collectives > 6 | ################################################################################ 7 | window_name Outside MPI collectives 8 | window_type single 9 | window_id 1 10 | window_position_x 310 11 | window_position_y 203 12 | window_width 600 13 | window_height 134 14 | window_comm_lines_enabled false 15 | window_maximum_y 70.000000 16 | window_scale_relative 1.000000 17 | window_object appl { 1, { All } } 18 | window_begin_time_relative 0.000000000000 19 | window_pos_to_disp 597 20 | window_pos_of_x_scale 18 21 | window_pos_of_y_scale 120 22 | window_number_of_row 128 23 | window_click_options 1 0 1 0 0 0 24 | window_click_info 0 32283334120 32309105806 0 32296219963 25 | window_expanded false 26 | window_open false 27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 28 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 30 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, 1-Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 31 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 32 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } } 33 | window_analyzer_executed 0 34 | window_analyzer_info 0.000000 0.000000 0 0 35 | window_filter_module evt_type 1 50000002 36 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/In_MPI_send_pt2pt_call.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW In MPI send p2p call > 7 | ################################################################################ 8 | window_name In MPI send p2p call 9 | window_type single 10 | window_id 1 11 | window_position_x 420 12 | window_position_y 205 13 | window_width 600 14 | window_height 147 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 70.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000001000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 0 34 | window_drawmode_rows 0 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Is Equal (Sign)}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_semantic_module task Thread i { 1, { 1 0.000000000000 } } 39 | window_semantic_module compose_thread Is Equal (Sign) { 1, { 5 1.000000000000 3.000000000000 34.000000000000 41.000000000000 42.000000000000 } } 40 | window_filter_module evt_type 1 50000001 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/p2p_bytes_sent.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW p2p bytes sent > 7 | ################################################################################ 8 | window_name p2p bytes sent 9 | window_type single 10 | window_id 1 11 | window_position_x 590 12 | window_position_y 596 13 | window_width 592 14 | window_height 302 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 13421772.000000 26 | window_minimum_y 0.000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open false 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Sent Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_semantic_module task Thread i { 1, { 1 0.000000 } } 39 | window_filter_module evt_type 1 50000001 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/p2p_send_size.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Bytes sent btw events > 7 | ################################################################################ 8 | window_name Bytes sent btw events 9 | window_type single 10 | window_id 1 11 | window_position_x 392 12 | window_position_y 419 13 | window_width 600 14 | window_height 244 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 4181568.000000 26 | window_minimum_y 0.000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open false 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Sent Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_semantic_module task Thread i { 1, { 1 0.000000 } } 39 | window_filter_module evt_type 1 50000001 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/p2p_size.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | begin_description 4 | Bytes sent/recevied within point to point calls 5 | end_description 6 | 7 | ################################################################################ 8 | < NEW DISPLAYING WINDOW Bytes btw events > 9 | ################################################################################ 10 | window_name Bytes btw events 11 | window_type single 12 | window_id 1 13 | window_position_x 371 14 | window_position_y 449 15 | window_width 600 16 | window_height 114 17 | window_comm_lines_enabled false 18 | window_color_mode window_in_null_gradient_mode 19 | window_compute_y_max 20 | window_scale_relative 1.000000 21 | window_object appl { 1, { All } } 22 | window_begin_time_relative 0.000000000000 23 | window_pos_to_disp 598 24 | window_pos_of_x_scale 18 25 | window_pos_of_y_scale 80 26 | window_number_of_row 32 27 | window_click_options 1 0 1 0 0 0 28 | window_click_info 1 7938040900 8210825810 26 8074433355 29 | window_expanded false 30 | window_open false 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } } 35 | window_analyzer_executed 0 36 | window_analyzer_info 0.000000 0.000000 0 0 37 | window_filter_module evt_type 1 50000001 38 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/total_bw.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Total Send bandwidth (MB/s) > 7 | ################################################################################ 8 | window_name Total Send bandwidth (MB/s) 9 | window_type single 10 | window_id 1 11 | window_position_x 596 12 | window_position_y 190 13 | window_width 600 14 | window_height 143 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode false 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 10.400097390318 26 | window_minimum_y 0.000000000000 27 | window_compute_y_max false 28 | window_level appl 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { 1 } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Send BandWidth}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_semantic_module thread Send BandWidth { 1, { 1 1.000000000000 } } 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/total_bytes_btw_events.cfg: -------------------------------------------------------------------------------- 1 | version 3.4 2 | number_of_windows 1 3 | 4 | ################################################################################ 5 | < NEW DISPLAYING WINDOW Total bytes btw events > 6 | ################################################################################ 7 | window_name Total bytes btw events 8 | window_type single 9 | window_id 1 10 | window_position_x 529 11 | window_position_y 374 12 | window_width 600 13 | window_height 302 14 | window_comm_lines_enabled false 15 | window_noncolor_mode false 16 | window_color_mode window_in_null_gradient_mode 17 | window_maximum_y 441440.000000 18 | window_minimum_y 320.000000 19 | window_level appl 20 | window_scale_relative 1.000000 21 | window_object appl { 1, { 1 } } 22 | window_begin_time_relative 0.000000000000 23 | window_pos_to_disp 597 24 | window_pos_of_x_scale 18 25 | window_pos_of_y_scale 115 26 | window_number_of_row 1 27 | window_click_options 1 0 1 1 0 0 28 | window_click_info 1 31861509 31863359 0 31862434 29 | window_expanded false 30 | window_open false 31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } } 33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } } 34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 35 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } } 36 | window_analyzer_executed 0 37 | window_analyzer_info 0.000000 0.000000 0 0 38 | window_filter_module evt_type 1 50000001 39 | -------------------------------------------------------------------------------- /paraver-cfgs/mpi/views/point2point/total_msgs_in_transit.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Total msgs in transit > 7 | ################################################################################ 8 | window_name Total msgs in transit 9 | window_type single 10 | window_id 1 11 | window_position_x 406 12 | window_position_y 60 13 | window_width 600 14 | window_height 143 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode false 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 20.000000000000 25 | window_minimum_y 1.000000000000 26 | window_compute_y_max false 27 | window_level appl 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { 1 } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Send Messages in Transit}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | 39 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/cuda/CUDA_runtime.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW CUDA Runtime > 7 | ################################################################################ 8 | window_name CUDA Runtime 9 | window_type single 10 | window_id 1 11 | window_position_x 487 12 | window_position_y 225 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled true 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 30.000000000000 25 | window_minimum_y 1.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open false 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module tag_msg 1 1 39 | window_filter_module evt_type 1 9200027 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/data_mgmgt/cache_waiting_for.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Cache waiting for something > 7 | ################################################################################ 8 | window_name Cache waiting for something 9 | window_type single 10 | window_id 1 11 | window_position_x 584 12 | window_position_y 165 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled false 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 30.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module evt_type 1 9200029 39 | 40 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/data_mgmgt/data_tx.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | Data transfers between host and devices 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Data Transfers > 7 | ################################################################################ 8 | window_name Data Transfers 9 | window_type single 10 | window_id 1 11 | window_position_x 2317 12 | window_position_y 254 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled true 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 4.000000000000 25 | window_minimum_y 1.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module tag_msg 1 1 39 | window_filter_module evt_type 1 9200060 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/graph_and_scheduling/creating_submitting_task.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Creating/submitting task > 7 | ################################################################################ 8 | window_name Creating/submitting task 9 | window_type single 10 | window_id 1 11 | window_position_x 650 12 | window_position_y 269 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 27.000000000000 25 | window_minimum_y 2.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open false 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, !=}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, Select Range} } } 38 | window_semantic_module topcompose2 Select Range { 2, { 1 18.000000000000, 1 17.000000000000 } } 39 | window_filter_module tag_msg 1 1 40 | window_filter_module evt_type 1 9200001 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/graph_and_scheduling/nb_concurrent_ready.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW # concurrent tasks in ready > 7 | ################################################################################ 8 | window_name # concurrent tasks in ready 9 | window_type single 10 | window_id 1 11 | window_position_x 630 12 | window_position_y 277 13 | window_width 826 14 | window_height 398 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode false 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 158.000000000000 26 | window_minimum_y 1.000000000000 27 | window_compute_y_max true 28 | window_level task 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_filter_module evt_type 1 9200050 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/graph_and_scheduling/nb_ready_tasks.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW # ready tasks in queue > 7 | ################################################################################ 8 | window_name # ready tasks in queue 9 | window_type single 10 | window_id 1 11 | window_position_x 630 12 | window_position_y 277 13 | window_width 826 14 | window_height 398 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode false 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 158.000000000000 26 | window_minimum_y 1.000000000000 27 | window_compute_y_max true 28 | window_level task 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_filter_module evt_type 1 9200022 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/graph_and_scheduling/nb_tasks_in_graph.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Number of tasks in graph > 7 | ################################################################################ 8 | window_name Number of tasks in graph 9 | window_type single 10 | window_id 1 11 | window_position_x 648 12 | window_position_y 118 13 | window_width 623 14 | window_height 111 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode false 18 | window_color_mode window_in_null_gradient_mode 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Nanoseconds 25 | window_maximum_y 2714.000000000000 26 | window_minimum_y 1.000000000000 27 | window_compute_y_max false 28 | window_level task 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_filter_module evt_type 1 9200023 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/graph_and_scheduling/versioning_sched.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | Versioning scheduler decisions 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Versioning Scheduler > 7 | ################################################################################ 8 | window_name Versioning Scheduler 9 | window_type single 10 | window_id 1 11 | window_position_x 1916 12 | window_position_y 397 13 | window_width 958 14 | window_height 115 15 | window_comm_lines_enabled false 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Nanoseconds 24 | window_maximum_y 0.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max true 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, =} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module evt_type 1 9200035 39 | window_filter_module evt_value 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/opencl/opencl_runtime.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | Inside OpenCL runtime 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW OpenCL Runtime > 7 | ################################################################################ 8 | window_name OpenCL Runtime 9 | window_type single 10 | window_id 1 11 | window_position_x 518 12 | window_position_y 238 13 | window_width 600 14 | window_height 115 15 | window_comm_lines_enabled true 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 16.000000000000 25 | window_minimum_y 1.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module tag_msg 1 1 39 | window_filter_module evt_type 1 9200039 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/async_thread_state.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Async thread state > 7 | ################################################################################ 8 | window_name Async thread state 9 | window_type single 10 | window_id 1 11 | window_position_x 1870 12 | window_position_y 297 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled true 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 7.000000000000 25 | window_minimum_y 2.000000000000 26 | window_compute_y_max true 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_labels_to_draw 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Stacked Val}, {topcompose2, As Is} } } 39 | window_filter_module tag_msg 1 1 40 | window_filter_module evt_type 1 9200052 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/nanos_API.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW NANOS API > 7 | ################################################################################ 8 | window_name NANOS API 9 | window_type single 10 | window_id 1 11 | window_position_x 2081 12 | window_position_y 389 13 | window_width 641 14 | window_height 192 15 | window_comm_lines_enabled false 16 | window_flags_enabled true 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Nanoseconds 24 | window_maximum_y 43.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module evt_type 1 9200001 39 | 40 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/nanos_locks.cfg: -------------------------------------------------------------------------------- 1 | #ParaverCFG 2 | ConfigFile.Version: 3.4 3 | ConfigFile.NumWindows: 1 4 | 5 | 6 | ################################################################################ 7 | < NEW DISPLAYING WINDOW Nanos lock > 8 | ################################################################################ 9 | window_name Nanos lock 10 | window_type single 11 | window_id 1 12 | window_position_x 414 13 | window_position_y 521 14 | window_width 641 15 | window_height 115 16 | window_comm_lines_enabled false 17 | window_flags_enabled true 18 | window_noncolor_mode true 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 1.000000000000 26 | window_minimum_y 1.000000000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_labels_to_draw 1 38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Sign}, {topcompose2, As Is} } } 40 | window_filter_module tag_msg 1 1 41 | window_filter_module evt_type 1 9200017 42 | 43 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/num_threads.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Number of Threads > 7 | ################################################################################ 8 | window_name Number of Threads 9 | window_type single 10 | window_id 1 11 | window_position_x 480 12 | window_position_y 343 13 | window_width 735 14 | window_height 248 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 24.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level task 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_labels_to_draw 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_filter_module tag_msg 1 1 40 | window_filter_module evt_type 1 9200041 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/thread_cpuid.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW Thread cpuid > 7 | ################################################################################ 8 | window_name Thread cpuid 9 | window_type single 10 | window_id 1 11 | window_position_x 480 12 | window_position_y 638 13 | window_width 735 14 | window_height 248 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 24.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_labels_to_draw 1 37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 39 | window_filter_module tag_msg 1 1 40 | window_filter_module evt_type 1 9200042 41 | 42 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/thread_numa_node.cfg: -------------------------------------------------------------------------------- 1 | #ParaverCFG 2 | ConfigFile.Version: 3.4 3 | ConfigFile.NumWindows: 1 4 | 5 | 6 | ################################################################################ 7 | < NEW DISPLAYING WINDOW Thread NUMA node > 8 | ################################################################################ 9 | window_name Thread NUMA node 10 | window_type single 11 | window_id 1 12 | window_position_x 480 13 | window_position_y 638 14 | window_width 735 15 | window_height 248 16 | window_comm_lines_enabled false 17 | window_flags_enabled false 18 | window_noncolor_mode true 19 | window_logical_filtered true 20 | window_physical_filtered false 21 | window_comm_fromto true 22 | window_comm_tagsize true 23 | window_comm_typeval true 24 | window_units Microseconds 25 | window_maximum_y 24.000000000000 26 | window_minimum_y 0.000000000000 27 | window_compute_y_max false 28 | window_level thread 29 | window_scale_relative 1.000000000000 30 | window_end_time_relative 1.000000000000 31 | window_object appl { 1, { All } } 32 | window_begin_time_relative 0.000000000000 33 | window_open true 34 | window_drawmode 1 35 | window_drawmode_rows 1 36 | window_pixel_size 1 37 | window_labels_to_draw 1 38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 40 | window_filter_module tag_msg 1 1 41 | window_filter_module evt_type 1 9200064 42 | 43 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/runtime/thread_state.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW thread state > 7 | ################################################################################ 8 | window_name thread state 9 | window_type single 10 | window_id 1 11 | window_position_x 425 12 | window_position_y 35 13 | window_width 641 14 | window_height 115 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 15.000000000000 25 | window_minimum_y 0.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } } 38 | window_filter_module tag_msg 1 1 39 | window_filter_module evt_type 1 9000000 40 | 41 | -------------------------------------------------------------------------------- /paraver-cfgs/ompss/tasks/in_task.cfg: -------------------------------------------------------------------------------- 1 | ConfigFile.Version: 3.4 2 | ConfigFile.NumWindows: 1 3 | 4 | 5 | ################################################################################ 6 | < NEW DISPLAYING WINDOW In task > 7 | ################################################################################ 8 | window_name In task 9 | window_type single 10 | window_id 1 11 | window_position_x 501 12 | window_position_y 47 13 | window_width 641 14 | window_height 195 15 | window_comm_lines_enabled false 16 | window_flags_enabled false 17 | window_noncolor_mode true 18 | window_logical_filtered true 19 | window_physical_filtered false 20 | window_comm_fromto true 21 | window_comm_tagsize true 22 | window_comm_typeval true 23 | window_units Microseconds 24 | window_maximum_y 6.000000000000 25 | window_minimum_y 1.000000000000 26 | window_compute_y_max false 27 | window_level thread 28 | window_scale_relative 1.000000000000 29 | window_end_time_relative 1.000000000000 30 | window_object appl { 1, { All } } 31 | window_begin_time_relative 0.000000000000 32 | window_open true 33 | window_drawmode 1 34 | window_drawmode_rows 1 35 | window_pixel_size 1 36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } } 37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Sign}, {topcompose2, As Is} } } 38 | window_filter_module evt_type 1 9200011 39 | 40 | --------------------------------------------------------------------------------