├── .static
└── custom.css
├── .templates
└── layout.html
├── 00-introduction
├── README.rst
├── blackscholes_solution.png
└── perlin_noise_solution.png
├── 01-examples
├── INTRO.rst
├── README.rst
├── array-sum-fortran
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── array_sum.f90
├── cholesky
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── cholesky.c
│ └── cholesky.h
├── stream-barr
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── stream-barr.c
└── stream-deps
│ ├── .config
│ ├── multirun.sh
│ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── stream-deps.c
├── 02-beginners
├── README.rst
├── dot-product
│ ├── .config
│ │ ├── dot-product.c
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── dot-product.c
├── matmul
│ ├── .config
│ │ ├── matmul.c
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── matmul.c
└── multisort
│ ├── .config
│ ├── multirun.sh
│ ├── multisort.c
│ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── multisort.c
├── 03-gpu-devices
├── INTRO.rst
├── README.rst
├── cholesky-cuda
│ ├── .config
│ │ ├── cholesky_hyb.c
│ │ ├── cuda_potrf.h
│ │ ├── multirun.sh
│ │ ├── run-once.sh
│ │ ├── run.sh
│ │ └── run_trace.sh
│ ├── Makefile
│ ├── README.rst
│ ├── cholesky_hyb.c
│ ├── cuda_potrf.cu
│ └── cuda_potrf.h
├── krist-cuda
│ ├── .config
│ │ ├── krist.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── clocks.c
│ ├── kernel.cu
│ ├── krist.c
│ └── krist.h
├── krist-opencl
│ ├── .config
│ │ ├── krist.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── clocks.c
│ ├── kernel.cl
│ ├── krist.c
│ └── krist.h
├── matmul-cuda
│ ├── .config
│ │ ├── kernel.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── cclock.c
│ ├── check.c
│ ├── driver.c
│ ├── driver.h
│ ├── gendat.c
│ ├── kernel.cu
│ ├── kernel.h
│ ├── matmul.c
│ └── prtspeed.c
├── matmul-opencl
│ ├── .config
│ │ ├── kernel.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── cclock.c
│ ├── check.c
│ ├── driver.c
│ ├── driver.h
│ ├── gendat.c
│ ├── kernel.cl
│ ├── kernel.h
│ ├── matmul.c
│ └── prtspeed.c
├── nbody-cuda
│ ├── .config
│ │ ├── kernel.h
│ │ ├── multirun.sh
│ │ ├── nbody.h
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── kernel.cu
│ ├── kernel.h
│ ├── nbody.c
│ ├── nbody.h
│ ├── nbody_input-16384.in
│ └── nbody_out-ref.xyz
├── nbody-opencl
│ ├── .config
│ │ ├── kernel.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── kernel.c
│ ├── kernel.cl
│ ├── kernel.h
│ ├── nbody.c
│ ├── nbody.h
│ ├── nbody_input-16384.in
│ └── nbody_out-ref.xyz
├── saxpy-cuda
│ ├── .config
│ │ ├── kernel.h
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── kernel.cu
│ ├── kernel.h
│ └── saxpy.c
└── saxpy-opencl
│ ├── .config
│ ├── kernel.h
│ ├── multirun.sh
│ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── kernel.cl
│ ├── kernel.h
│ └── saxpy.c
├── 04-mpi+ompss
├── README.rst
├── heat
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── heat-mpi-ompss.c
│ ├── heat.h
│ ├── misc.c
│ ├── solver-mpi-ompss.c
│ └── test.dat
└── matmul
│ ├── .config
│ ├── mm-image.png
│ ├── multirun.sh
│ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── bsize.h
│ ├── cclock.c
│ ├── check.c
│ ├── driver.c
│ ├── gendat.c
│ ├── layouts.c
│ ├── layouts.h
│ ├── matmul.c
│ ├── matmul.h
│ ├── prthead.c
│ ├── prtspeed.c
│ └── test.in
├── 05-ompss+dlb
├── README.rst
├── lub
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── LUB.c
│ ├── Makefile
│ └── README.rst
├── lulesh
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README
│ ├── README.rst
│ ├── lulesh-comm.cc
│ ├── lulesh-init.cc
│ ├── lulesh-util.cc
│ ├── lulesh-viz.cc
│ ├── lulesh.cc
│ ├── lulesh.h
│ └── lulesh_tuple.h
├── pils-multiapp
│ ├── .config
│ │ ├── multirun.sh
│ │ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ ├── extrae-multiapp.xml
│ ├── input1
│ ├── input2
│ ├── ompss_pils.c
│ └── trace-multiapp.sh
└── pils
│ ├── .config
│ ├── multirun.sh
│ └── run-once.sh
│ ├── Makefile
│ ├── README.rst
│ └── mpi_ompss_pils.c
├── README.rst
├── common-files
├── Makefile
├── configure_VirtualBox
├── configure_default
├── configure_mn4
├── configure_nord3
├── configure_nvidia
├── extrae.xml
├── graph.sh
├── multirun.sh
├── paraver
├── run-once.sh
├── sched-job-mpi_mn4
├── sched-job-mpi_nord3
├── sched-job-mpi_nvidia
├── sched-job-smp_mn4
├── sched-job-smp_nord3
├── sched-job-smp_nvidia
├── trace-mpi.sh
└── trace.sh
├── configure.sh
└── paraver-cfgs
├── cluster
└── network_transfers_and_bw.cfg
├── general
├── 2dp_WhereMyNextTaskWasGenerated.cfg
├── analysis
│ ├── 2dh_L1Dmr.cfg
│ ├── 2dh_ipc.cfg
│ ├── 2dh_ui.cfg
│ ├── 2dh_useful_MIPS.cfg
│ ├── 2dh_useful_instr.cfg
│ ├── 2dh_usefultime.cfg
│ ├── 2dp_uf.cfg
│ ├── 2dp_useful.cfg
│ ├── 3dh_duration_uf.cfg
│ ├── 3dh_instr_uf.cfg
│ ├── 3dh_ipc_uf.cfg
│ ├── Load_balance.cfg
│ ├── Sup_model_data.cfg
│ ├── advanced
│ │ ├── 2dc_cyc_ipc.cfg
│ │ ├── 2dc_ud_ipc.cfg
│ │ ├── 2dc_ufduration_IPC.cfg
│ │ ├── 2dp_percentMPI_in_uf.cfg
│ │ ├── 2dp_ufExcludingMPI.cfg
│ │ ├── 3dc_instr_ipc_useful.cfg
│ │ ├── 3dc_usefulInstr_cycles_ipc.cfg
│ │ ├── 3dh_ud_u.cfg
│ │ ├── 3dh_usefulIPC_uf.cfg
│ │ ├── 3dp_ipc_useful.cfg
│ │ ├── Sup_model_data.2.cfg
│ │ ├── avg_procs.cfg
│ │ └── load_balance_for_specific_uf.cfg
│ └── efficiency.cfg
├── link_to_source
│ └── by_call_stack
│ │ ├── 2dp_MPIcallertime.cfg
│ │ ├── 2dp_MPItime_line.cfg
│ │ ├── 2dp_code_after_line.cfg
│ │ ├── 2dp_line_call.cfg
│ │ ├── 3dh_duration_MPIcallline.cfg
│ │ ├── 3dp_MPItime_line.cfg
│ │ ├── MPI_caller.cfg
│ │ ├── MPI_caller_line.cfg
│ │ ├── MPI_callers_4_levels.cfg
│ │ └── nbcalls_toMPI_per_uf.cfg
├── sanity_checks
│ ├── 2dh_cycperus.cfg
│ ├── Equivalent_CPUs.cfg
│ ├── Events_too_close.cfg
│ ├── flushing.cfg
│ ├── preempted_time_in_useful.cfg
│ └── preempted_time_outside_useful.cfg
└── views
│ ├── User_function_excl_MPI.cfg
│ ├── instantaneous_parallelism.cfg
│ ├── not_running_duration.cfg
│ ├── not_useful.cfg
│ ├── one.cfg
│ ├── state_as_is.cfg
│ ├── state_duration.cfg
│ ├── useful.cfg
│ ├── useful_duration.cfg
│ ├── user_calls.cfg
│ ├── user_calls_duration.cfg
│ ├── user_function_nesting_level.cfg
│ ├── user_functions.cfg
│ └── user_functions_duration.cfg
├── hwc
├── active_set.cfg
└── papi
│ ├── architecture
│ ├── 2dh_L1D_total_misses.cfg
│ ├── 2dh_L2_total_misses.cfg
│ ├── 2dh_TLB_total_misses.cfg
│ ├── 2dh_preemption_time.cfg
│ ├── 3dh_IPC_state.cfg
│ ├── 3dh_percentpreempted_useful.cfg
│ ├── 3dh_preempted_useful.cfg
│ ├── BytesPerFlop.cfg
│ ├── BytesPerInstr.cfg
│ ├── JS21_relative_preempted_time.cfg
│ ├── L1D_misses.cfg
│ ├── L1D_missratio.cfg
│ ├── L1_2_L2_miss_ratio.cfg
│ ├── L1_Load_misses.cfg
│ ├── L1_Load_missratio.cfg
│ ├── L1_store_misses.cfg
│ ├── L2D_Total_miss_ratio.cfg
│ ├── L2D_miss_ratio.cfg
│ ├── L2D_miss_ratio_v2.cfg
│ ├── L2D_misses.cfg
│ ├── L2_2_TLB_miss_ratio.cfg
│ ├── L3D_miss_ratio.cfg
│ ├── L3D_misses.cfg
│ ├── L3_misses.cfg
│ ├── Preempted_time.cfg
│ ├── Relative_preempted_time.cfg
│ ├── TLB_misses.cfg
│ ├── TLB_missratio.cfg
│ ├── loaded_bytes.cfg
│ ├── relative_preemption_time.cfg
│ ├── useful_loadad_bytes.cfg
│ └── useful_loaded_bytes.cfg
│ ├── models
│ └── 3D_duration.cfg
│ ├── mx_counters
│ ├── nb_medium_msgs_sent.cfg
│ ├── nb_rndv_msgs_sent.cfg
│ ├── nb_small_msgs_sent.cfg
│ └── route_dispersion.cfg
│ ├── performance
│ ├── 2dh_ipc_frequency.cfg
│ ├── 3dh_cycles_per_us.cfg
│ ├── CPI.cfg
│ ├── IPC.cfg
│ ├── L2Dmisses_rate.cfg
│ ├── MFLOPS.cfg
│ ├── MFMAS.cfg
│ ├── MIPS.cfg
│ ├── MLoadS.cfg
│ ├── MemBW_pernode.cfg
│ ├── MemBW_perprocess.cfg
│ ├── NoIssue_cycles_per_us.cfg
│ ├── cycles_per_us.cfg
│ ├── cycles_per_us_decentInterval.cfg
│ ├── useful_MIPS.cfg
│ └── useful_cycus.cfg
│ └── program
│ ├── 3dh_instr_state.cfg
│ ├── Computation_intensity.cfg
│ ├── Load2store_ratio.cfg
│ ├── Load_stores.cfg
│ ├── Loads.cfg
│ ├── Loads_to_FMA_ratio.cfg
│ ├── Stores.cfg
│ ├── branch_mix.cfg
│ ├── flops.cfg
│ ├── instructions.cfg
│ └── useful_instructions.cfg
├── mpi
├── analysis
│ ├── 2dc_connectivity_bw.cfg
│ ├── 2dh_bytes_sent.cfg
│ ├── 2dp_MPI_activity.cfg
│ ├── 2dp_connectivity.cfg
│ ├── 2dp_mpi_stats.cfg
│ ├── 3dc_connectivity_caller.cfg
│ ├── 3dh_duration_MPI_activity.cfg
│ ├── 3dh_duration_per_call.cfg
│ ├── 3dh_size_call.cfg
│ ├── advanced
│ │ ├── 2dc_bytessent_totbytessent.cfg
│ │ ├── 2dc_connectivity_snd_bytes.cfg
│ │ ├── 2dc_e2ebw_bytes.cfg
│ │ ├── 2dh_comm_phase_duration.cfg
│ │ └── 2dp_totbytessent.cfg
│ ├── avg_netbw.cfg
│ ├── collectives
│ │ ├── 3dh_duration_collective.cfg
│ │ ├── 3dh_recvsize_collectivecall.cfg
│ │ └── 3dh_sendsize_collectivecall.cfg
│ ├── other
│ │ ├── 3dc_p2p_size_bw_per_call.cfg
│ │ ├── 3dc_size_bw_per_call.cfg
│ │ ├── 3dh_bw_per_call.cfg
│ │ ├── 3dh_duration_per_call.cfg
│ │ ├── Collective_LateArrivers.cfg
│ │ ├── Collective_stats.cfg
│ │ ├── Collectives_balance.cfg
│ │ ├── CommComp_overlap.cfg
│ │ ├── Correlation_duration_size.cfg
│ │ ├── CostOf_p2pCalls.cfg
│ │ ├── MPIxroutine.cfg
│ │ ├── Specific_collective_analysis.cfg
│ │ ├── System_BW.cfg
│ │ ├── call_duration_histogram.cfg
│ │ └── communication_matrix.cfg
│ └── point2point
│ │ ├── 2d_comm_pattern.cfg
│ │ ├── 2d_comm_pattern_rcv.cfg
│ │ ├── 2d_comm_pattern_snd.cfg
│ │ ├── 2d_comm_pattern_snd_order.cfg
│ │ ├── 2d_costofreceives_per_source.cfg
│ │ ├── 2d_who_comms.cfg
│ │ ├── 2d_who_latesends_to_whom.cfg
│ │ ├── 2dh_p2p_phase_duration.cfg
│ │ ├── 2dh_send_size.cfg
│ │ ├── 2dp_high_bw_process.cfg
│ │ ├── 3dc_msgsize_totbytes.cfg
│ │ ├── 3dc_srbw_bytes.cfg
│ │ ├── 3dh_bw_per_call.cfg
│ │ ├── 3dh_cost_per_call.cfg
│ │ ├── 3dh_msgsize_per_pt2pt_call.cfg
│ │ ├── 3dh_srbw_per_call.cfg
│ │ ├── IProbe_density.cfg
│ │ └── system_bw.cfg
├── sanity_checks
│ ├── 2d_compute_shifts.cfg
│ ├── 2dt_backwards_time.cfg
│ ├── 2dt_brwds_nbbwrds.cfg
│ ├── backward_msgs.cfg
│ ├── cloged_system.cfg
│ ├── duration_backwards_msg.cfg
│ └── src_backwards_msg.cfg
├── scalasca_properties
│ ├── late_receivers.cfg
│ ├── received_from_delayed.cfg
│ ├── receives_from_late_sender.cfg
│ └── receiving_from_latesender.cfg
└── views
│ ├── Activity_duration.cfg
│ ├── Enumeration_of_MPI_calls.cfg
│ ├── InMPI_mem_BW.cfg
│ ├── In_MPI_call.cfg
│ ├── MPI_activity.cfg
│ ├── MPI_bandwidth.cfg
│ ├── MPI_call.cfg
│ ├── MPI_call_density.cfg
│ ├── MPI_call_duration.cfg
│ ├── Outside_MPI.cfg
│ ├── advanced
│ ├── 2d_who_comms.cfg
│ ├── Failed_iprobes.cfg
│ ├── Failed_tests.cfg
│ ├── From_where_mpi_calls.cfg
│ ├── Isend_waits.cfg
│ ├── MPI_Wait_from_Isend.cfg
│ ├── MPI_collectives.cfg
│ ├── MPI_p2p.cfg
│ ├── MPIcall_cost.cfg
│ ├── MPIcall_cost_perbyte.cfg
│ ├── MPIcall_duration.cfg
│ ├── Who_calls_mpi.cfg
│ ├── bytes_arriving.cfg
│ ├── bytes_outgoing.cfg
│ ├── bytes_sr_within_call.cfg
│ ├── bytesperMBS.cfg
│ ├── bytespermsg.cfg
│ ├── in_MPI_call.cfg
│ ├── in_specific_MPI_call.cfg
│ ├── long_MPI_calls.cfg
│ ├── messages_arriving.cfg
│ ├── messages_outgoing.cfg
│ ├── p2p_phase_duration.cfg
│ ├── receive_bandwidth.cfg
│ ├── receive_bandwidth_appl.cfg
│ ├── receive_bandwidth_task.cfg
│ ├── send_bandwidth.cfg
│ ├── send_bandwidth_appl.cfg
│ ├── send_bandwidth_task.cfg
│ ├── specific_MPI_duration.cfg
│ ├── sr_msgs.cfg
│ ├── total_bytes_in_transit.cfg
│ ├── total_sr_bw.cfg
│ ├── total_sr_msgs.cfg
│ ├── total_system_bw.cfg
│ └── typeof_MPI_Wait.cfg
│ ├── collectives
│ ├── MPI_collective_call.cfg
│ ├── advanced
│ │ ├── Broadcast_number.cfg
│ │ └── all2all_number.cfg
│ ├── collective_duration.cfg
│ ├── collective_root.cfg
│ ├── collective_sizes.cfg
│ ├── communicator.cfg
│ ├── enumerate_collectives.cfg
│ ├── nbprocs_in_colective.cfg
│ └── outside_collective.cfg
│ ├── comm_size.cfg
│ ├── communication_phase.cfg
│ ├── communication_phase_duration.cfg
│ ├── in_MPI_activity.cfg
│ ├── msg_sizes.cfg
│ ├── nb_active_processes.cfg
│ ├── nb_in_MPI.cfg
│ ├── node_bandwidth.cfg
│ └── point2point
│ ├── In_MPI_pt2pt_call.cfg
│ ├── In_MPI_reception_call.cfg
│ ├── In_MPI_send_pt2pt_call.cfg
│ ├── MPICall_overhead.cfg
│ ├── MPI_p2p_call.cfg
│ ├── advanced
│ ├── In_long_receptions.cfg
│ ├── bytes_received_at_waits.cfg
│ ├── destination_last_large_send.cfg
│ ├── exclusively_1_direction_transfers.cfg
│ ├── high_bw_process.cfg
│ ├── high_s_r_bandwidth.cfg
│ ├── max_recBW_during_activity.cfg
│ ├── physical_s_r_bandwidth.cfg
│ ├── physical_s_r_msgs.cfg
│ ├── receiving_not_sending.cfg
│ └── sending_not_receiving.cfg
│ ├── destination_of_send.cfg
│ ├── iprobe_misses.cfg
│ ├── iprobe_misses_per_ms.cfg
│ ├── models
│ ├── excess_time.cfg
│ └── linear_model.cfg
│ ├── nb_collective.cfg
│ ├── nbprocs_in_pt2pt.cfg
│ ├── outstanding_sends.cfg
│ ├── p2p_bytes_received.cfg
│ ├── p2p_bytes_sent.cfg
│ ├── p2p_duration.cfg
│ ├── p2p_recv_size.cfg
│ ├── p2p_send_size.cfg
│ ├── p2p_size.cfg
│ ├── s_r_bandwidth.cfg
│ ├── s_r_bytes.cfg
│ ├── s_r_msgs.cfg
│ ├── source_of_reception.cfg
│ ├── to_whom_I_send.cfg
│ ├── total_bw.cfg
│ ├── total_bytes_btw_events.cfg
│ ├── total_msgs_in_transit.cfg
│ └── wait_type.cfg
└── ompss
├── 2d_general.cfg
├── cuda
├── 3dh_duration_CUDAruntime.cfg
├── CUDA_runtime.cfg
├── Non_overlapped_CUDA_Transfer_Direction.cfg
└── cuda_transfers.cfg
├── data_mgmgt
├── 2dh_bw2device.cfg
├── aggregated_bandwidth.cfg
├── bandwidth_per_device.cfg
├── bytes_being_transfered.cfg
├── cache_waiting_for.cfg
├── data_tx.cfg
├── direction_of_data_transfer_nosesiescorrecta.cfg
├── malloc_free_in_device.cfg
└── nb_ongoing_transfers_duda.cfg
├── general.cfg
├── graph_and_scheduling
├── 2dp_order.cfg
├── creating_submitting_task.cfg
├── nb_concurrent_ready.cfg
├── nb_ready_tasks.cfg
├── nb_tasks_in_graph.cfg
└── versioning_sched.cfg
├── opencl
└── opencl_runtime.cfg
├── runtime
├── 2dp_thread_state.cfg
├── 3dh_duration_state.cfg
├── async_thread_state.cfg
├── average_sleep_time.cfg
├── nanos_API.cfg
├── nanos_locks.cfg
├── num_threads.cfg
├── spins_yields.cfg
├── thread_cpuid.cfg
├── thread_numa_node.cfg
├── thread_state.cfg
├── thread_state_with_locks.cfg
└── waiting_task.cfg
├── tasks
├── 2dp_tasks.cfg
├── 3dh_L2Tmr_task.cfg
├── 3dh_L2mr_task.cfg
├── 3dh_duration_task.cfg
├── 3dh_instr_task.cfg
├── 3dh_ipc_task.cfg
├── in_task.cfg
├── task_name_and_location.cfg
├── task_numa_node.cfg
├── task_number.cfg
└── task_priority.cfg
└── worksharing_loops.cfg
/.static/custom.css:
--------------------------------------------------------------------------------
1 | @import url('https://fonts.googleapis.com/css?family=Roboto+Condensed:400,700');
2 | @import url('https://fonts.googleapis.com/css?family=Roboto+Mono:400,700');
3 |
4 | span.option {
5 | font-family: "Roboto Mono", monospace
6 | }
7 |
8 | body:before {
9 | content: "Programming Models @ BSC";
10 | background: #6fa5cc;
11 | width: 100%;
12 | display: block;
13 | text-align: center;
14 | height: auto;
15 | overflow: hidden;
16 | color: #000000;
17 | font-size: 48px;
18 | font-style: normal;
19 | font-weight: 600;
20 | line-height: 54px;
21 | padding: 3rem 0 3rem 0;
22 | }
23 |
24 | @media screen and (max-width: 875px) {
25 | body:before {
26 | margin: -20px -30px 20px -30px;
27 | width: calc(100% + 60px);
28 | }
29 | }
30 |
31 | .document {
32 | width: 1040px !important;
33 | }
34 |
35 | @media screen and (max-width: 875px) {
36 | .document {
37 | width: 100% !important;
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/.templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 | {% block rootrellink %}
3 |
BSC Programming Models »
4 | {{ super() }}
5 | {% endblock %}
6 |
7 |
--------------------------------------------------------------------------------
/00-introduction/blackscholes_solution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/00-introduction/blackscholes_solution.png
--------------------------------------------------------------------------------
/00-introduction/perlin_noise_solution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/00-introduction/perlin_noise_solution.png
--------------------------------------------------------------------------------
/01-examples/INTRO.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 |
4 | In this section we include several OmpSs applications that are already parallelized (i.e. annotated
5 | with OmpSs directives). Users have not to change the code, but they are encouraged to experiment
6 | with them. You can also use that source directory to experiment with the different compiler
7 | and runtime options, as well as the different instrumentation plugins provided with your OmpSs
8 | installation.
9 |
10 |
--------------------------------------------------------------------------------
/01-examples/README.rst:
--------------------------------------------------------------------------------
1 | Examples Using OmpSs
2 | ********************
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 | :numbered:
7 |
8 | INTRO.rst
9 | cholesky/README.rst
10 | stream-barr/README.rst
11 | array-sum-fortran/README.rst
12 |
--------------------------------------------------------------------------------
/01-examples/array-sum-fortran/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=array_sum-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 | NSIZES="8388608"
7 | BSIZES="32768"
8 |
9 | for N in $NSIZES; do
10 | for BS in $BSIZES; do
11 | for thread in $THREADS; do
12 | NX_SMP_WORKERS=$thread ./$PROGRAM $N $BS
13 | done
14 | done
15 | done
16 |
--------------------------------------------------------------------------------
/01-examples/array-sum-fortran/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=array_sum-p
2 | N=8388608
3 | BS=32768
4 |
5 | ./$PROGRAM $N $BS
6 |
--------------------------------------------------------------------------------
/01-examples/array-sum-fortran/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=array_sum
2 |
3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
4 |
5 | JOB_SCHED_VERSION=-smp
6 |
7 | BASE_DIR=../..
8 | include $(BASE_DIR)/common-files/Makefile
9 |
10 | FC = mfc
11 |
12 | FFLAGS = --ompss --no-copy-deps
13 | FFLAGS_P =
14 | FFLAGS_I = --instrument
15 | FFLAGS_D = --debug
16 |
17 | LIBS =
18 | INCS =
19 |
20 | EXTRA = -O3
21 |
22 | $(PROGRAM)-p: $(PROGRAM).f90
23 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
24 |
25 | $(PROGRAM)-i: $(PROGRAM).f90
26 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
27 |
28 | $(PROGRAM)-d: $(PROGRAM).f90
29 | $(FC) $(GFLAGS) $(FFLAGS) $(FFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
30 |
31 | clean:
32 | rm -f $(FC)_* *.o *~ $(TARGETS)
33 |
34 |
--------------------------------------------------------------------------------
/01-examples/cholesky/.config/multirun.sh:
--------------------------------------------------------------------------------
1 |
2 | PROGRAM=cholesky-p
3 |
4 | export IFS=";"
5 |
6 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
7 | MSIZES="2048"
8 | BSIZES="256"
9 |
10 | for MS in $MSIZES; do
11 | for BS in $BSIZES; do
12 | for thread in $THREADS; do
13 | NX_SMP_WORKERS=$thread ./$PROGRAM $MS $BS 0
14 | done
15 | done
16 | done
17 |
--------------------------------------------------------------------------------
/01-examples/cholesky/.config/run-once.sh:
--------------------------------------------------------------------------------
1 |
2 | PROGRAM=cholesky-p
3 |
4 | export NX_SMP_WORKERS=4
5 |
6 | ./$PROGRAM 4096 512 1
7 |
8 |
--------------------------------------------------------------------------------
/01-examples/cholesky/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=cholesky
2 |
3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
4 |
5 | JOB_SCHED_VERSION=-smp
6 |
7 | BASE_DIR=../..
8 | include $(BASE_DIR)/common-files/Makefile
9 |
10 | CC = mcc
11 |
12 | CFLAGS = --ompss
13 | CFLAGS_P =
14 | CFLAGS_I = --instrument
15 | CFLAGS_D = --debug
16 |
17 | LIBS = --Wl,-L$(MKL_LIB_DIR) -lmkl_sequential -lmkl_core -lmkl_rt -lpthread
18 | INCS = -I$(MKL_INC_DIR)
19 |
20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused
21 |
22 | $(PROGRAM)-p: $(PROGRAM).c $(MKL_CHECK)
23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
24 |
25 | $(PROGRAM)-i: $(PROGRAM).c $(MKL_CHECK)
26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
27 |
28 | $(PROGRAM)-d: $(PROGRAM).c $(MKL_CHECK)
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
30 |
31 | clean:
32 | rm -f $(CC)_* *.o *~ $(TARGETS)
33 |
34 |
--------------------------------------------------------------------------------
/01-examples/stream-barr/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-barr-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 |
7 | for thread in $THREADS; do
8 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM
9 | done
10 |
--------------------------------------------------------------------------------
/01-examples/stream-barr/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-barr-p
2 |
3 | export NX_SMP_WORKERS=1
4 |
5 | ./$PROGRAM
6 |
7 |
--------------------------------------------------------------------------------
/01-examples/stream-barr/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-barr
2 |
3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
4 |
5 | JOB_SCHED_VERSION=-smp
6 |
7 | BASE_DIR=../..
8 | include $(BASE_DIR)/common-files/Makefile
9 |
10 | CC = mcc
11 |
12 | CFLAGS = --ompss
13 | CFLAGS_P =
14 | CFLAGS_I = --instrument
15 | CFLAGS_D = --debug
16 |
17 | LIBS =
18 | INCS =
19 |
20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused -mcmodel=large
21 |
22 | $(PROGRAM)-p: $(PROGRAM).c
23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
24 |
25 | $(PROGRAM)-i: $(PROGRAM).c
26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
27 |
28 | $(PROGRAM)-d: $(PROGRAM).c
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
30 |
31 | clean:
32 | rm -f $(CC)_* *.o *~ $(TARGETS)
33 |
34 |
--------------------------------------------------------------------------------
/01-examples/stream-barr/README.rst:
--------------------------------------------------------------------------------
1 | Stream Benchmark
2 | ----------------
3 |
4 | The stream benchmark is part of the HPC Challenge benchmarks (http://icl.cs.utk.edu/hpcc/) and here
5 | we present two versions: one that inserts barriers and another without barriers. The behavior of
6 | version with barriers resembles the OpenMP version, where the different functions (Copy, Scale, ...)
7 | are executed one after another for the whole array while in the version without barriers, functions
8 | that operate on one part of the array are interleaved and the OmpSs runtime keeps the correctness
9 | by means of the detection of data-dependences.
10 |
11 | .. note::
12 | You can dowload this code visiting the url http://pm.bsc.es *OmpSs Examples and Exercises*'s
13 | (code) link. The Stream benchmark is included inside the *01-examples*'s directory.
14 |
--------------------------------------------------------------------------------
/01-examples/stream-deps/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-deps-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 |
7 | for thread in $THREADS; do
8 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM
9 | done
10 |
--------------------------------------------------------------------------------
/01-examples/stream-deps/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-deps-p
2 |
3 | export NX_SMP_WORKERS=1
4 |
5 | ./$PROGRAM
6 |
--------------------------------------------------------------------------------
/01-examples/stream-deps/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=stream-deps
2 |
3 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
4 |
5 | JOB_SCHED_VERSION=-smp
6 |
7 | BASE_DIR=../..
8 | include $(BASE_DIR)/common-files/Makefile
9 |
10 | CC = mcc
11 |
12 | CFLAGS = --ompss
13 | CFLAGS_P =
14 | CFLAGS_I = --instrument
15 | CFLAGS_D = --debug
16 |
17 | LIBS =
18 | INCS =
19 |
20 | EXTRA = -std=c99 -O3 -Wall -Wno-unused -mcmodel=large
21 |
22 | $(PROGRAM)-p: $(PROGRAM).c
23 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
24 |
25 | $(PROGRAM)-i: $(PROGRAM).c
26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
27 |
28 | $(PROGRAM)-d: $(PROGRAM).c
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
30 |
31 | clean:
32 | rm -f $(CC)_* *.o *~ $(TARGETS)
33 |
34 |
--------------------------------------------------------------------------------
/01-examples/stream-deps/README.rst:
--------------------------------------------------------------------------------
1 | ../stream-barr/README.rst
--------------------------------------------------------------------------------
/02-beginners/README.rst:
--------------------------------------------------------------------------------
1 | Beginners Exercises
2 | *******************
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 | :numbered:
7 |
8 | matmul/README.rst
9 | dot-product/README.rst
10 | multisort/README.rst
11 |
--------------------------------------------------------------------------------
/02-beginners/dot-product/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=dot-product-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 | MSIZE="8192"
7 | BSIZE="128"
8 |
9 | for MS in $MSIZE; do
10 | for BS in $BSIZE; do
11 | for thread in $THREADS; do
12 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $MS $BS
13 | done
14 | done
15 | done
16 |
--------------------------------------------------------------------------------
/02-beginners/dot-product/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=dot-product-p
2 |
3 | export NX_SMP_WORKERS=4
4 |
5 | ./$PROGRAM 8192 128
6 |
7 |
--------------------------------------------------------------------------------
/02-beginners/dot-product/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=dot-product
2 | PREFIX=.
3 |
4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION=-smp
7 |
8 | BASE_DIR=../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = mcc
12 |
13 | CFLAGS = --ompss --no-copy-deps
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS =
19 | INCS =
20 |
21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused
22 |
23 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c
24 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
25 |
26 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c
27 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
28 |
29 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c
30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
31 |
32 | clean:
33 | rm -f $(CC)_* *.o *~ $(TARGETS)
34 |
35 |
--------------------------------------------------------------------------------
/02-beginners/matmul/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 | SIZES="16"
7 |
8 | for size in $SIZES; do
9 | for thread in $THREADS; do
10 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $size
11 | done
12 | done
13 |
--------------------------------------------------------------------------------
/02-beginners/matmul/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export NX_SMP_WORKERS=4
4 |
5 | ./$PROGRAM 16
6 |
7 |
--------------------------------------------------------------------------------
/02-beginners/matmul/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul
2 | PREFIX=.
3 |
4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION=-smp
7 |
8 | BASE_DIR=../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = mcc
12 |
13 | CFLAGS = --ompss
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS =
19 | INCS =
20 |
21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused
22 |
23 | RM = rm -f
24 |
25 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
26 |
27 | all: $(TARGETS)
28 |
29 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c
30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
31 |
32 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c
33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
34 |
35 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c
36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
37 |
38 | clean:
39 | $(RM) $(CC)_* *.o *~ $(TARGETS)
40 |
41 |
--------------------------------------------------------------------------------
/02-beginners/matmul/README.rst:
--------------------------------------------------------------------------------
1 | Matrix Multiplication
2 | ---------------------
3 |
4 | .. highlight:: c
5 |
6 | This example performs the multiplication of two matrices (A and B) into a third one (C). Since
7 | the code is not optimized, not very good performance results are expected. Think about how to
8 | parallelize (using OmpSs) the following code found in compute() function::
9 |
10 | for (i = 0; i < DIM; i++)
11 | for (j = 0; j < DIM; j++)
12 | for (k = 0; k < DIM; k++)
13 | matmul ((double *)A[i][k], (double *)B[k][j], (double *)C[i][j], NB);
14 |
15 | This time you are on your own: you have to identify what code must be a task. There are a few
16 | hints and that you may consider before do the exercise:
17 |
18 | * Have a look at the compute function. It is the one that the main procedure calls to perform
19 | the multiplication. As you can see, this algorithm operates on blocks (to increase memory
20 | locality and to parallelize operations on those blocks).
21 | * Now go to the matmul function. As you can see, this function performs the multiplication on
22 | a block level.
23 | * When creating tasks do not forget to ensure that all of them have finished before returning
24 | the result of the matrix multiplication (would it be necessary any synchronization directive
25 | to guarantee that result has been already computed?).
26 |
27 | **Goals of this exercise**
28 |
29 | * Look for candidates to become a task and taskify them
30 | * Include synchroniztion directives when required
31 | * Check scalability (for different versions), use different runtime options (schedulers,... )
32 | * Get a task dependency graph and/or paraver traces
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/02-beginners/multisort/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=multisort-p
2 |
3 | export IFS=";"
4 |
5 | THREADS="01;02;03;04;05;06;07;08;09;10;11;12"
6 | VSIZE="65536"
7 | SEQ_SORT="256"
8 | SEQ_MERGE="512"
9 |
10 | for size in $VSIZE; do
11 | for seq_sort in $SEQ_SORT; do
12 | for seq_merge in $SEQ_MERGE; do
13 | for thread in $THREADS; do
14 | NX_GPUS=0 NX_SMP_WORKERS=$thread ./$PROGRAM $size $seq_sort $seq_merge
15 | done
16 | done
17 | done
18 | done
19 |
--------------------------------------------------------------------------------
/02-beginners/multisort/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=multisort-p
2 |
3 | export NX_SMP_WORKERS=4
4 |
5 | ./$PROGRAM 65536 256 512
6 |
7 |
--------------------------------------------------------------------------------
/02-beginners/multisort/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=multisort
2 | PREFIX=.
3 |
4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION=-smp
7 |
8 | BASE_DIR=../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = mcc
12 |
13 | CFLAGS = --ompss --no-copy-deps
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS =
19 | INCS =
20 |
21 | EXTRA = -std=c99 -O3 -Wall -Wno-unused
22 |
23 | RM = rm -f
24 |
25 | all: $(TARGETS)
26 |
27 | $(PROGRAM)-p: $(PREFIX)/$(PROGRAM).c
28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
29 |
30 | $(PROGRAM)-i: $(PREFIX)/$(PROGRAM).c
31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
32 |
33 | $(PROGRAM)-d: $(PREFIX)/$(PROGRAM).c
34 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS)
35 |
36 | clean:
37 | $(RM) $(CC)_* *.o *~ $(TARGETS)
38 |
--------------------------------------------------------------------------------
/03-gpu-devices/INTRO.rst:
--------------------------------------------------------------------------------
1 | Introduction
2 | ============
3 |
4 | Almost all the programs in this section is available both in OpenCL and CUDA. From the point of
5 | view of an OmpSs programmer, the only difference between them is the language in which the kernel
6 | is written.
7 |
8 | As OmpSs abstracts the user from doing the work in the host part of the code. Both OpenCL and CUDA
9 | have the same syntax. You can do any of the two versions, as they are basically the same, when you
10 | got one of them working, same steps can be done in the other version.
11 |
12 |
--------------------------------------------------------------------------------
/03-gpu-devices/README.rst:
--------------------------------------------------------------------------------
1 | GPU Device Exercises
2 | ********************
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 | :numbered:
7 |
8 | INTRO.rst
9 | saxpy-cuda/README.rst
10 | krist-cuda/README.rst
11 | matmul-cuda/README.rst
12 | nbody-cuda/README.rst
13 | cholesky-cuda/README.rst
14 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/.config/cuda_potrf.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_POTRF_H_
2 | #define _CUDA_POTRF_H_
3 |
4 | #include
5 |
6 | #ifdef __cplusplus
7 | extern "C"{
8 | #endif
9 |
10 | int
11 | cuda_dpotrf(cublasHandle_t handle, char uplo, int n,
12 | double *dA, int ldda, int *info);
13 |
14 | int
15 | cuda_spotrf(cublasHandle_t handle, char uplo, int n,
16 | float *dA, int ldda, int *info);
17 |
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 |
22 | #endif // _CUDA_POTRF_H_
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=cholesky_hyb-p
2 |
3 | export NX_SMP_WORKERS=1
4 |
5 | export NX_GPUMAXMEM=90
6 |
7 | # Executing the application
8 | for gpus in 1 2 ; do
9 | export NX_GPUS=$gpus
10 | ./$PROGRAM 16384 2048 0
11 | done
12 |
13 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=cholesky_hyb-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_GPUS=2 #change this in order to use more GPUs
5 |
6 | export NX_GPUMAXMEM=90
7 |
8 | # Executing the application
9 | ./$PROGRAM 16384 2048 0
10 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/.config/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #@ wall_clock_limit = 00:20:00
3 | #@ initialdir = .
4 | #@ error = cholesky_%j.err
5 | #@ output = cholesky_%j.out
6 | #@ total_tasks = 1
7 | #@ cpus_per_task = 12
8 | #@ gpus_per_node = 2
9 |
10 | export LD_LIBRARY_PATH=/opt/compilers/intel/mkl/lib/intel64/:$LD_LIBRARY_PATH
11 | export NX_SMP_WORKERS=1
12 |
13 | for gpus in 1 2 ; do
14 | echo "Number of gpus = $gpus"
15 | export NX_GPUS=$gpus
16 | ./cholesky_hyb 16384 2048 0
17 | echo " "
18 | done
19 |
20 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/.config/run_trace.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | #@ wall_clock_limit = 00:20:00
3 | #@ initialdir = .
4 | #@ error = cholesky_%j.err
5 | #@ output = cholesky_%j.out
6 | #@ total_tasks = 1
7 | #@ cpus_per_task = 12
8 | #@ gpus_per_node = 2
9 |
10 |
11 |
12 | export NX_INSTRUMENTATION=extrae
13 | export LD_LIBRARY_PATH=/opt/compilers/intel/mkl/lib/intel64/:$LD_LIBRARY_PATH
14 | export EXTRAE_CONFIG_FILE=../../extrae.xml
15 |
16 | for gpus in 1 2 ; do
17 | echo "Number of gpus = $gpus"
18 | export NX_GPUS=$gpus
19 | ./cholesky_hyb 16384 2048 0
20 | echo " "
21 | done
22 |
23 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=cholesky_hyb
2 | KERNEL=cuda_potrf
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 | KC = nvcc
14 |
15 | CFLAGS = --ompss --cuda
16 | CFLAGS_P =
17 | CFLAGS_I = --instrument
18 | CFLAGS_D = --debug
19 |
20 | NVCFLAGS = -O3 -arch=sm_20
21 |
22 |
23 | LIBS = --Wl,-L$(MKL_LIB_DIR) -lmkl_sequential -lmkl_core -lmkl_rt -lpthread
24 | INCS = -I$(PREFIX) -I$(MKL_INC_DIR)
25 |
26 | EXTRA = -O3 -Wall -Wno-unused
27 |
28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(MKL_CHECK)
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o
30 |
31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(MKL_CHECK)
32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o
33 |
34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(MKL_CHECK)
35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o
36 |
37 |
38 | $(PROGRAM)-p.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h
39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $<
40 |
41 | $(PROGRAM)-i.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h
42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $<
43 |
44 | $(PROGRAM)-d.o: $(PREFIX)/$(PROGRAM).c $(PREFIX)/$(KERNEL).h
45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $<
46 |
47 |
48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h
49 | $(KC) $(NVCFLAGS) $(INCS) -o $@ -c $<
50 |
51 | .c.o:
52 | $(CC) --no-openmp $(EXTRA) -c $<
53 |
54 | clean:
55 | rm -f $(CC)_* *.o *~ $(TARGETS)
56 |
57 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/README.rst:
--------------------------------------------------------------------------------
1 | Cholesky kernel
2 | ---------------
3 |
4 | This kernel is just like the SMP version found in the examples, but implemented
5 | in CUDA. It uses CUBLAS kernels for the ``syrk``, ``trsm`` and ``gemm``
6 | kernels, and a CUDA implementation for the potrf kernel (declared in a
7 | different file).
8 |
9 | Your assignment is to annotate all CUDA tasks in the source code under the
10 | section "TASKS FOR CHOLESKY".
11 |
12 |
--------------------------------------------------------------------------------
/03-gpu-devices/cholesky-cuda/cuda_potrf.h:
--------------------------------------------------------------------------------
1 | #ifndef _CUDA_POTRF_H_
2 | #define _CUDA_POTRF_H_
3 |
4 | #include
5 |
6 | #ifdef __cplusplus
7 | extern "C"{
8 | #endif
9 |
10 | int
11 | cuda_dpotrf(cublasHandle_t handle, char uplo, int n,
12 | double *dA, int ldda, int *info);
13 |
14 | int
15 | cuda_spotrf(cublasHandle_t handle, char uplo, int n,
16 | float *dA, int ldda, int *info);
17 |
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 |
22 | #endif // _CUDA_POTRF_H_
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/.config/krist.h:
--------------------------------------------------------------------------------
1 | #define DIM2_H 4
2 | #define DIM2_A 4
3 | #define DIM2_E 2
4 |
5 | #if DIM2_H == 4
6 | #define TYPE_H float4
7 | #endif
8 | #if DIM2_H == 3
9 | #define TYPE_H float3
10 | #endif
11 |
12 | #if DIM2_A == 4
13 | #define TYPE_A float4
14 | #endif
15 | #if DIM2_A == 3
16 | #define TYPE_A float3
17 | #endif
18 |
19 | #if DIM2_E == 4
20 | #define TYPE_E float4
21 | #endif
22 | #if DIM2_E == 3
23 | #define TYPE_E float3
24 | #endif
25 | #if DIM2_E == 2
26 | #define TYPE_E float2
27 | #endif
28 |
29 | #ifdef __cplusplus
30 | extern "C"
31 | {
32 | #endif
33 |
34 | #pragma omp target device(cuda) copy_deps ndrange(1,nr,128)
35 | #pragma omp task in([NA] a, [NH] h) out([NE] E)
36 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA,
37 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E);
38 |
39 | #ifdef __cplusplus
40 | }
41 | #endif
42 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=krist-p
2 |
3 | export NX_GPUMAXMEM=90
4 |
5 | export IFS=";"
6 |
7 | GPUS="1"
8 | ATOMS="1000;2000;3000;"
9 | REFLECTIONS="2000"
10 |
11 | for atoms in $ATOMS; do
12 | for reflections in $REFLECTIONS; do
13 | for NX_GPUS in $GPUS; do
14 | ./$PROGRAM $atoms $reflections
15 | done
16 | done
17 | done
18 |
19 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=krist-p
2 |
3 | export NX_GPUMAXMEM=90
4 | export NX_GPUS=1 #change this in order to use more GPUs
5 |
6 | ./$PROGRAM 1000 2000 --serial
7 |
8 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=krist
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 | KC = nvcc
14 |
15 | CFLAGS = --ompss --cuda
16 | CFLAGS_P =
17 | CFLAGS_I = --instrument
18 | CFLAGS_D = --debug
19 |
20 | NVCFLAGS = -O3
21 |
22 | LIBS =
23 | INCS = -I$(PREFIX)
24 |
25 | EXTRA = -O3 -Wall -Wno-unused
26 |
27 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o clocks.o
28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o
29 |
30 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o clocks.o
31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o
32 |
33 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o clocks.o
34 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o clocks.o
35 |
36 |
37 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/krist.h
38 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $<
39 |
40 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/krist.h
41 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $<
42 |
43 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/krist.h
44 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $<
45 |
46 |
47 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/krist.h
48 | $(KC) $(INCS) -o $@ -c $<
49 |
50 | .c.o:
51 | $(CC) --no-openmp $(EXTRA) -c $<
52 |
53 | clean:
54 | rm -f $(CC)_* *.o *~ $(TARGETS)
55 |
56 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/clocks.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "unistd.h"
6 | double cputime() /* aix, xlf */
7 | {
8 | struct tms b;
9 | clock_t r;
10 | times( &b);
11 | r = b.tms_utime + b.tms_stime;
12 | return ( (double) r/(double) sysconf(_SC_CLK_TCK));
13 | }
14 | double CPUTIME() /* cray */
15 | {
16 | return ( cputime());
17 | }
18 | double cputime_() /* g77, gcc */
19 | {
20 | return ( cputime());
21 | }
22 |
23 | double wallclock()
24 | {
25 | struct timeval toot;
26 | //struct timezone prut;
27 | double r;
28 |
29 | //gettimeofday(&toot,&prut);
30 | gettimeofday(&toot, NULL);
31 | r=toot.tv_sec+0.000001*(double)toot.tv_usec;
32 | return(r);
33 | }
34 | double WALLCLOCK()
35 | {
36 | return (wallclock());
37 | }
38 | double wallclock_()
39 | {
40 | return wallclock();
41 | }
42 |
43 | void fortransleep(int *i)
44 | {
45 | sleep(*i);
46 | }
47 |
48 | void FORTRANSLEEP(int *i)
49 | {
50 | sleep(*i);
51 | }
52 |
53 | void fortransleep_(int *i)
54 | {
55 | sleep(*i);
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 |
8 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA,
9 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E)
10 | {
11 | __shared__ TYPE_A ashared[(16384-2048)/sizeof(TYPE_A)];
12 | int a_start;
13 |
14 | int i = blockDim.x * blockIdx.x + threadIdx.x;
15 | if (i < nr) E[i].x = E[i].y = 0.0f;
16 |
17 | for (a_start = 0; a_start < na; a_start += nc) {
18 | int a_end = min(a_start + nc, na);
19 | int k = threadIdx.x;
20 | while (k < a_end - a_start) {
21 | ashared[k] = a[k + a_start];
22 | k += blockDim.x;
23 | }
24 |
25 | __syncthreads();
26 |
27 | if (i < nr) {
28 | int j;
29 | float A,B;
30 | const float twopi = 6.28318584f;
31 |
32 | TYPE_H hi = h[i];
33 | A = 0.0f;
34 | B = 0.0f;
35 |
36 | int jmax = a_end - a_start;
37 | for (j=0; j < jmax; j++) {
38 | float A1,B1;
39 | float4 aj = ashared[j];
40 | float arg = twopi*(hi.x*aj.y +
41 | hi.y*aj.z +
42 | hi.z*aj.w);
43 | sincosf(arg, &B1, &A1);
44 | A += aj.x*A1;
45 | B += aj.x*B1;
46 | }
47 | E[i].x += A*f2;
48 | E[i].y += B*f2;
49 | }
50 | __syncthreads();
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-cuda/krist.h:
--------------------------------------------------------------------------------
1 |
2 | #define DIM2_H 4
3 | #define DIM2_A 4
4 | #define DIM2_E 2
5 |
6 | #if DIM2_H == 4
7 | #define TYPE_H float4
8 | #endif
9 | #if DIM2_H == 3
10 | #define TYPE_H float3
11 | #endif
12 | #if DIM2_A == 4
13 | #define TYPE_A float4
14 | #endif
15 | #if DIM2_A == 3
16 | #define TYPE_A float3
17 | #endif
18 | #if DIM2_E == 4
19 | #define TYPE_E float4
20 | #endif
21 | #if DIM2_E == 3
22 | #define TYPE_E float3
23 | #endif
24 | #if DIM2_E == 2
25 | #define TYPE_E float2
26 | #endif
27 |
28 | #ifdef __cplusplus
29 | extern "C"
30 | {
31 | #endif
32 |
33 | #pragma omp target device(cuda) copy_deps ndrange(/*???*/)
34 | #pragma omp task /* in and outs? */
35 | __global__ void cstructfac(int na, int nr, int nc, float f2, int NA,
36 | TYPE_A*a, int NH, TYPE_H* h, int NE, TYPE_E*E);
37 |
38 | #ifdef __cplusplus
39 | }
40 | #endif
41 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/.config/krist.h:
--------------------------------------------------------------------------------
1 |
2 | #define DIM2_H 4
3 | #define DIM2_A 4
4 | #define DIM2_E 2
5 |
6 | #if DIM2_H == 4
7 | #define TYPE_H float4
8 | #endif
9 | #if DIM2_H == 3
10 | #define TYPE_H float3
11 | #endif
12 | #if DIM2_A == 4
13 | #define TYPE_A float4
14 | #endif
15 | #if DIM2_A == 3
16 | #define TYPE_A float3
17 | #endif
18 | #if DIM2_E == 4
19 | #define TYPE_E float4
20 | #endif
21 | #if DIM2_E == 3
22 | #define TYPE_E float3
23 | #endif
24 | #if DIM2_E == 2
25 | #define TYPE_E float2
26 | #endif
27 |
28 | #ifndef __OPENCL_VERSION__
29 | #pragma omp target device(opencl) copy_deps ndrange(1,nr,128)
30 | #pragma omp task in([NA] a, [NH] h) out([NE] E)
31 | __kernel void cstructfac(int na, int nr, int nc, float f2,
32 | int NA, __global float* a, int NH , __global float* h, int NE, __global float* E);
33 | #endif
34 |
35 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=krist-p
2 |
3 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
4 |
5 | ./$PROGRAM 10000 20000
6 |
7 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=krist-p
2 |
3 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
4 |
5 | ./$PROGRAM 1000 2000
6 |
7 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=krist
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 |
14 | CFLAGS = --ompss --opencl
15 | CFLAGS_P =
16 | CFLAGS_I = --instrument
17 | CFLAGS_D = --debug
18 |
19 | LIBS =
20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX)
21 |
22 | EXTRA = -O3 -Wall -Wno-unused
23 |
24 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl clocks.o
25 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o
26 |
27 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl clocks.o
28 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o
29 |
30 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl clocks.o
31 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl clocks.o
32 |
33 | .c.o:
34 | $(CC) --no-openmp $(EXTRA) -c $<
35 |
36 | clean:
37 | rm -f $(CC)_* *.o *~ $(TARGETS)
38 |
39 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/README.rst:
--------------------------------------------------------------------------------
1 | ../krist-cuda/README.rst
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/clocks.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include "unistd.h"
6 | double cputime() /* aix, xlf */
7 | {
8 | struct tms b;
9 | clock_t r;
10 | times( &b);
11 | r = b.tms_utime + b.tms_stime;
12 | return ( (double) r/(double) sysconf(_SC_CLK_TCK));
13 | }
14 | double CPUTIME() /* cray */
15 | {
16 | return ( cputime());
17 | }
18 | double cputime_() /* g77, gcc */
19 | {
20 | return ( cputime());
21 | }
22 |
23 | double wallclock()
24 | {
25 | struct timeval toot;
26 | //struct timezone prut;
27 | double r;
28 |
29 | //gettimeofday(&toot,&prut);
30 | gettimeofday(&toot, NULL);
31 | r=toot.tv_sec+0.000001*(double)toot.tv_usec;
32 | return(r);
33 | }
34 | double WALLCLOCK()
35 | {
36 | return (wallclock());
37 | }
38 | double wallclock_()
39 | {
40 | return wallclock();
41 | }
42 |
43 | void fortransleep(int *i)
44 | {
45 | sleep(*i);
46 | }
47 |
48 | void FORTRANSLEEP(int *i)
49 | {
50 | sleep(*i);
51 | }
52 |
53 | void fortransleep_(int *i)
54 | {
55 | sleep(*i);
56 | }
57 |
58 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/kernel.cl:
--------------------------------------------------------------------------------
1 | #include
2 |
3 |
4 | #ifdef cl_khr_fp64
5 | #pragma OPENCL EXTENSION cl_khr_fp64 : enable
6 | #elif defined(cl_amd_fp64)
7 | #pragma OPENCL EXTENSION cl_amd_fp64 : enable
8 | #else
9 | #error "Double precision floating point not supported by OpenCL implementation."
10 | #endif
11 |
12 | __kernel void cstructfac(int na, int nr, int nc, float f2, int NA,
13 | __global TYPE_A* a,int NH, __global TYPE_H* h,int NE,__global TYPE_E* E)
14 | {
15 | __local TYPE_A ashared[(16384-2048)/(sizeof(TYPE_A))];
16 | int a_start;
17 |
18 | int i = get_global_id(0);
19 | if (i < nr) E[i].x = E[i].y = 0.0f;
20 |
21 | for (a_start = 0; a_start < na; a_start += nc) {
22 | int a_end = min(a_start + nc, na);
23 | int k = get_local_id(0);
24 | while (k < a_end - a_start) {
25 | ashared[k] = a[k + a_start];
26 | k += get_local_size(0);
27 | }
28 |
29 | barrier(CLK_LOCAL_MEM_FENCE);
30 |
31 | if (i < nr) {
32 | int j;
33 | float A,B;
34 | const float twopi = 6.28318584f;
35 |
36 | TYPE_H hi = h[i];
37 | A = 0.0f;
38 | B = 0.0f;
39 |
40 | int jmax = a_end - a_start;
41 | for (j=0; j < jmax; j++) {
42 | float A1,B1;
43 | float4 aj = ashared[j];
44 | float arg = twopi*(hi.x*aj.y +
45 | hi.y*aj.z +
46 | hi.z*aj.w);
47 | B1=sincos(arg, &A1);
48 | A += aj.x*A1;
49 | B += aj.x*B1;
50 | }
51 | E[i].x += A*f2;
52 | E[i].y += B*f2;
53 | }
54 | barrier(CLK_LOCAL_MEM_FENCE);
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/03-gpu-devices/krist-opencl/krist.h:
--------------------------------------------------------------------------------
1 |
2 | #define DIM2_H 4
3 | #define DIM2_A 4
4 | #define DIM2_E 2
5 |
6 | #if DIM2_H == 4
7 | #define TYPE_H float4
8 | #endif
9 | #if DIM2_H == 3
10 | #define TYPE_H float3
11 | #endif
12 | #if DIM2_A == 4
13 | #define TYPE_A float4
14 | #endif
15 | #if DIM2_A == 3
16 | #define TYPE_A float3
17 | #endif
18 | #if DIM2_E == 4
19 | #define TYPE_E float4
20 | #endif
21 | #if DIM2_E == 3
22 | #define TYPE_E float3
23 | #endif
24 | #if DIM2_E == 2
25 | #define TYPE_E float2
26 | #endif
27 |
28 | #ifndef __OPENCL_VERSION__
29 | #pragma omp target device(opencl) copy_deps ndrange(/*???*/)
30 | #pragma omp task /* in and outs? */
31 | __kernel void cstructfac(int na, int nr, int nc, float f2,
32 | int NA, __global float* a, int NH , __global float* h, int NE, __global float* E);
33 | #endif
34 |
35 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/.config/kernel.h:
--------------------------------------------------------------------------------
1 |
2 | // Thread block size
3 | #define BLOCK_SIZE 16
4 |
5 |
6 | #ifdef DP
7 | #define REAL double
8 | #else
9 | #define REAL float
10 | #endif
11 |
12 | #ifdef __cplusplus
13 | extern "C"
14 | {
15 | #endif
16 |
17 | //Kernel declaration as a task should be here
18 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB
19 | #pragma omp target device(cuda) ndrange(2,NB,NB,16,16) copy_deps
20 | #pragma omp task inout([NB*NB]C) in([NB*NB]A,[NB*NB]B)
21 | __global__ void Muld(REAL* A, REAL* B, int wA, int wB, REAL* C,int NB);
22 |
23 | #ifdef __cplusplus
24 | }
25 | #endif
26 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export IFS=";"
4 |
5 | export NX_GPUMAXMEM=90
6 |
7 | GPUS="01;02"
8 | SIZES="8192"
9 |
10 | for size in $SIZES; do
11 | # Creating the input file
12 | touch test.in
13 | echo "$size $size $size 3" > test.in
14 | for gpu in $GPUS; do
15 | # Executing the application
16 | NX_GPUS=$gpu NX_SMP_WORKERS=1 ./$PROGRAM
17 | done
18 | done
19 |
20 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_GPUS=2 #change this in order to use more GPUs
5 |
6 | export NX_GPUMAXMEM=90
7 |
8 | # Creating the input file
9 | touch test.in
10 | echo "8192 8192 8192 3" > test.in
11 |
12 | # Executing the application
13 | ./$PROGRAM
14 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 | KC = nvcc
14 |
15 | CFLAGS = --ompss --cuda
16 | CFLAGS_P =
17 | CFLAGS_I = --instrument
18 | CFLAGS_D = --debug
19 |
20 | NVCFLAGS = -O3
21 |
22 | LIBS =
23 | INCS = -I$(PREFIX)
24 |
25 | PRECISION=-DDP
26 | EXTRA = -O3 -Wall -Wno-unused
27 | OBJECTS= check.o gendat.o prtspeed.o cclock.o
28 |
29 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS)
30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
31 |
32 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS)
33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
34 |
35 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS)
36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
37 |
38 |
39 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
40 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $<
41 |
42 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
43 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $<
44 |
45 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
46 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(PRECISION) $(INCS) -o $@ -c $<
47 |
48 |
49 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h
50 | $(KC) $(INCS) $(PRECISION) -o $@ -c $<
51 |
52 | .c.o:
53 | $(CC) --no-openmp $(EXTRA) $(PRECISION) -c $<
54 |
55 | clean:
56 | rm -f $(CC)_* *.o *~ $(TARGETS)
57 | rm -f test.in
58 |
59 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/cclock.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | /* -------------------------------------------------------------------
6 |
7 | This function returns the wall clock time with micro seconds
8 | accuracy.
9 | The data type of the returned value is "double".
10 |
11 | The function can be called from a FORTRAN module. The value
12 | returned by cclock_ and cclock should be of type REAL(Kind = 8).
13 |
14 | -------------------------------------------------------------------
15 | */
16 |
17 | double cclock_( void )
18 | {
19 | const double micro = 1.0e-06; /* Conversion constant */
20 | static long start = 0L, startu;
21 | struct timeval tp; /* Structure used by gettimeofday */
22 | double wall_time; /* To hold the result */
23 |
24 |
25 | if ( gettimeofday( &tp, NULL) == -1 )
26 | wall_time = -1.0e0;
27 | else if( !start ) {
28 | start = tp.tv_sec;
29 | startu = tp.tv_usec;
30 | wall_time = 0.0e0;
31 | }
32 | else
33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu);
34 |
35 | return wall_time;
36 | }
37 |
38 |
39 | double cclock( void )
40 | {
41 | return cclock_();
42 | }
43 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/check.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "driver.h"
6 |
7 | #ifdef DP
8 | #define REAL double
9 | #else
10 | #define REAL float
11 | #endif
12 |
13 | //#define BSIZE 1024
14 |
15 | int check( int nrep, int m, int l, int n, int mDIM, int nDIM, REAL **c/*[][nDIM*BSIZE] */)
16 | {
17 | double eps, tvalue = (double)l;
18 | int i, j, k, o, ok = 0;
19 |
20 | eps = 2.0*l*l*DBL_EPSILON;
21 | int perfectM = m / BSIZE;
22 | int perfectN = n / BSIZE;
23 |
24 | int leftOutM = m % BSIZE;
25 | int leftOutN = n % BSIZE;
26 |
27 | for(i=0;i perfectM && k >= leftOutM )
32 | break;
33 | else if( j == nDIM-1 && nDIM > perfectN && o >= leftOutN )
34 | break;
35 | else {
36 | if ( fabs( tvalue - (c[i*nDIM+j][k*BSIZE+o]/nrep) ) > eps ) {
37 | ok++;
38 | //printf("Bad result at [%d][%d] : expected %f but found %f\n", i*nDIM+j, k*BSIZE+o, tvalue, c[i*nDIM+j][k*BSIZE+o]);
39 | }
40 | }
41 | }
42 | }
43 | }
44 | }
45 |
46 | return( ok );
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/driver.h:
--------------------------------------------------------------------------------
1 | //#define BSIZE 512
2 | #define BSIZE 1024
3 | //#define BSIZE 2048
4 | //#define BSIZE 4096
5 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/kernel.h:
--------------------------------------------------------------------------------
1 |
2 | // Thread block size
3 | #define BLOCK_SIZE 16
4 |
5 | #ifdef DP
6 | #define REAL double
7 | #else
8 | #define REAL float
9 | #endif
10 |
11 | #ifdef __cplusplus
12 | extern "C"
13 | {
14 | #endif
15 |
16 | //Kernel declaration as a task should be here
17 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB
18 |
19 | #ifdef __cplusplus
20 | }
21 | #endif
22 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-cuda/prtspeed.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) )
4 | #if 0
5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops )
6 | {
7 | double speed;
8 | // -----------------------------------------------------------------
9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 );
10 | speed = 1.0e-9*nops/max( time, 1.0e-9 );
11 |
12 | printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed );
13 | if ( ok == 0 )
14 | printf( " T |\n" );
15 | else
16 | printf( " F (%d)|\n", ok );
17 | }
18 | #else
19 |
20 | void prtspeed( int m, int l, int n, int nb, double time, int ok, unsigned long nops )
21 | {
22 | double speed = 1.0e-9*nops/time;
23 | printf("Matrix size: %dx%d\n", m, n);
24 | printf("Block size: %dx%d\n", nb, nb);
25 | #ifdef DP
26 | printf("Precision type: Double\n");
27 | #else
28 | printf("Precision type: Simple\n");
29 | #endif
30 |
31 | printf(" GFLOPS : %.4lf\n", speed);
32 | printf(" computation time (in seconds): %.4lf\n", time);
33 | if ( ok == 0 ) {
34 | printf(" Verification: Ok\n");
35 | } else {
36 | printf(" Verification: Failed (%d)\n", ok);
37 | }
38 | }
39 | #endif
40 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/.config/kernel.h:
--------------------------------------------------------------------------------
1 |
2 | // Thread block size
3 | #define BLOCK_SIZE 16
4 | //Mercurium pragmas can't "read" values from #defines, so we "save" the value as integer
5 | __constant int BL_SIZE= BLOCK_SIZE;
6 |
7 |
8 | #ifdef DP
9 | #define REAL double
10 | #else
11 | #define REAL float
12 | #endif
13 |
14 | #ifdef __cplusplus
15 | extern "C"
16 | {
17 | #endif
18 |
19 | //Kernel declaration as a task should be here
20 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB
21 | #pragma omp target device(opencl) ndrange(2,NB,NB,BL_SIZE,BL_SIZE) copy_deps
22 | #pragma omp task inout([NB*NB]C) in([NB*NB]A,[NB*NB]B)
23 | __kernel void Muld(__global REAL* A,__global REAL* B, int wA, int wB,__global REAL* C,int NB);
24 |
25 | #ifdef __cplusplus
26 | }
27 | #endif
28 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
5 | export NX_OPENCL_DEVICE_TYPE=GPU
6 |
7 | # Creating input file
8 | touch test.in
9 | echo "4096 4096 4096 3" > test.in
10 |
11 | # Executing the program
12 | ./$PROGRAM
13 |
14 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
5 | export NX_OPENCL_DEVICE_TYPE=GPU
6 |
7 | # Creating input file
8 | touch test.in
9 | echo "4096 4096 4096 3" > test.in
10 |
11 | # Executing the program
12 | ./$PROGRAM
13 |
14 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 |
14 | CFLAGS = --ompss --opencl
15 | CFLAGS_P =
16 | CFLAGS_I = --instrument
17 | CFLAGS_D = --debug
18 |
19 | LIBS =
20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX)
21 |
22 | EXTRA = -O3 -Wall -Wno-unused
23 | OBJECTS = cclock.o driver.o prtspeed.o check.o gendat.o
24 |
25 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl $(OBJECTS)
26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS)
27 |
28 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl $(OBJECTS)
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS)
30 |
31 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl $(OBJECTS)
32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl $(OBJECTS)
33 |
34 | .c.o:
35 | $(CC) --no-openmp $(EXTRA) -c $<
36 |
37 | clean:
38 | rm -f $(CC)_* *.o *~ $(TARGETS)
39 |
40 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/README.rst:
--------------------------------------------------------------------------------
1 | ../matmul-cuda/README.rst
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/cclock.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | /* -------------------------------------------------------------------
6 |
7 | This function returns the wall clock time with micro seconds
8 | accuracy.
9 | The data type of the returned value is "double".
10 |
11 | The function can be called from a FORTRAN module. The value
12 | returned by cclock_ and cclock should be of type REAL(Kind = 8).
13 |
14 | -------------------------------------------------------------------
15 | */
16 |
17 | double cclock_( void )
18 | {
19 | const double micro = 1.0e-06; /* Conversion constant */
20 | static long start = 0L, startu;
21 | struct timeval tp; /* Structure used by gettimeofday */
22 | double wall_time; /* To hold the result */
23 |
24 |
25 | if ( gettimeofday( &tp, NULL) == -1 )
26 | wall_time = -1.0e0;
27 | else if( !start ) {
28 | start = tp.tv_sec;
29 | startu = tp.tv_usec;
30 | wall_time = 0.0e0;
31 | }
32 | else
33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu);
34 |
35 | return wall_time;
36 | }
37 |
38 |
39 | double cclock( void )
40 | {
41 | return cclock_();
42 | }
43 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/check.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #include "driver.h"
6 |
7 | #ifdef DP
8 | #define REAL double
9 | #else
10 | #define REAL float
11 | #endif
12 |
13 | //#define BSIZE 1024
14 |
15 | int check( int nrep, int m, int l, int n, int mDIM, int nDIM, REAL **c/*[][nDIM*BSIZE] */)
16 | {
17 | double eps, tvalue = (double)l;
18 | int i, j, k, o, ok = 0;
19 |
20 | eps = 2.0*l*l*DBL_EPSILON;
21 | int perfectM = m / BSIZE;
22 | int perfectN = n / BSIZE;
23 |
24 | int leftOutM = m % BSIZE;
25 | int leftOutN = n % BSIZE;
26 |
27 | for(i=0;i perfectM && k >= leftOutM )
32 | break;
33 | else if( j == nDIM-1 && nDIM > perfectN && o >= leftOutN )
34 | break;
35 | else {
36 | if ( fabs( tvalue - (c[i*nDIM+j][k*BSIZE+o]/nrep) ) > eps ) {
37 | ok++;
38 | //printf("Bad result at [%d][%d] : expected %f but found %f\n", i*nDIM+j, k*BSIZE+o, tvalue, c[i*nDIM+j][k*BSIZE+o]);
39 | }
40 | }
41 | }
42 | }
43 | }
44 | }
45 |
46 | return( ok );
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/driver.h:
--------------------------------------------------------------------------------
1 | //#define BSIZE 512
2 | #define BSIZE 1024
3 | //#define BSIZE 2048
4 | //#define BSIZE 4096
5 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/kernel.h:
--------------------------------------------------------------------------------
1 |
2 | // Thread block size
3 | #define BLOCK_SIZE 16
4 | //Mercurium pragmas can't "read" values from #defines, so we "save" the value as integer
5 | __constant int BL_SIZE= BLOCK_SIZE;
6 |
7 |
8 | #ifdef DP
9 | #define REAL double
10 | #else
11 | #define REAL float
12 | #endif
13 |
14 | #ifdef __cplusplus
15 | extern "C"
16 | {
17 | #endif
18 |
19 | //Kernel declaration as a task should be here
20 | //Remember, we want to multiply two matrices, (A*B=C) where all of them have size NB*NB
21 |
22 | #ifdef __cplusplus
23 | }
24 | #endif
25 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/matmul.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 |
6 | #include "driver.h"
7 |
8 | #ifdef DP
9 | #define REAL double
10 | #else
11 | #define REAL float
12 | #endif
13 |
14 |
15 | const int NB = BSIZE;
16 |
17 |
18 | void matmul( int m, int l, int n, int mDIM, int lDIM, int nDIM, REAL **tileA, REAL **tileB,
19 | REAL **tileC )
20 | {
21 | int i, j, k;
22 | for(i = 0;i < mDIM; i++){
23 | for (j = 0; j < nDIM; j++){
24 | for (k = 0; k < lDIM; k++){
25 | //Kernel call
26 | Muld(tileA[i*lDIM+k], tileB[k*nDIM+j],NB,NB, tileC[i*nDIM+j],NB);
27 | }
28 | }
29 | }
30 | #pragma omp taskwait
31 | }
32 |
33 |
34 |
--------------------------------------------------------------------------------
/03-gpu-devices/matmul-opencl/prtspeed.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) )
4 | #if 0
5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops )
6 | {
7 | double speed;
8 | // -----------------------------------------------------------------
9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 );
10 | speed = 1.0e-9*nops/max( time, 1.0e-9 );
11 |
12 | printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed );
13 | if ( ok == 0 )
14 | printf( " T |\n" );
15 | else
16 | printf( " F (%d)|\n", ok );
17 | }
18 | #else
19 |
20 | void prtspeed( int m, int l, int n, int nb, double time, int ok, unsigned long nops )
21 | {
22 | double speed = 1.0e-9*nops/time;
23 | printf("Matrix size: %dx%d\n", m, n);
24 | printf("Block size: %dx%d\n", nb, nb);
25 | #ifdef DP
26 | printf("Precision type: Double\n");
27 | #else
28 | printf("Precision type: Simple\n");
29 | #endif
30 |
31 | printf(" GFLOPS : %.4lf\n", speed);
32 | printf(" computation time (in seconds): %.4lf\n", time);
33 | if ( ok == 0 ) {
34 | printf(" Verification: Ok\n");
35 | } else {
36 | printf(" Verification: Failed (%d)\n", ok);
37 | }
38 | }
39 | #endif
40 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/.config/kernel.h:
--------------------------------------------------------------------------------
1 | #include"nbody.h"
2 |
3 | #ifdef __cplusplus
4 | extern "C" {
5 | #endif
6 |
7 | #pragma omp target device(cuda) ndrange(1,size,MAX_NUM_THREADS) copy_deps
8 | #pragma omp task in(d_particles[0;number_of_particles]) out([size] output)
9 | __global__ void calculate_force_func(int size, float time_interval, int number_of_particles,
10 | Particle* d_particles, Particle *output, int first_local,
11 | int last_local);
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody-p
2 |
3 | export NX_SMP_WORKERS=1
4 |
5 | for gpus in 1 2; do
6 | export NX_GPUS=$gpus
7 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 90" ./$PROGRAM nbody_input-16384.in
8 | done
9 |
10 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/.config/nbody.h:
--------------------------------------------------------------------------------
1 | #ifndef nbody_h
2 | #define nbody_h
3 |
4 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */
5 |
6 | typedef struct {
7 | float position_x; /* m */
8 | float position_y; /* m */
9 | float position_z; /* m */
10 | float velocity_x; /* m/s */
11 | float velocity_y; /* m/s */
12 | float velocity_z; /* m/s */
13 | float mass; /* kg */
14 | float pad;
15 | } Particle;
16 |
17 | #endif /* #ifndef nbody_h */
18 |
19 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_GPUS=2 #change this in order to use more GPUs
5 |
6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 90" ./$PROGRAM nbody_input-16384.in
7 |
8 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 | KC = nvcc
14 |
15 | CFLAGS = --ompss --cuda
16 | CFLAGS_P =
17 | CFLAGS_I = --instrument
18 | CFLAGS_D = --debug
19 |
20 | NVCFLAGS = -O3
21 |
22 | LIBS =
23 | INCS = -I$(PREFIX)
24 |
25 | EXTRA = -O3 -Wall -Wno-unused
26 | OBJECTS=
27 |
28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS)
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
30 |
31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS)
32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
33 |
34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS)
35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
36 |
37 |
38 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $<
40 |
41 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $<
43 |
44 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $<
46 |
47 |
48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h
49 | $(KC) $(INCS) -o $@ -c $<
50 |
51 | .c.o:
52 | $(CC) --no-openmp $(EXTRA) -c $<
53 |
54 | clean:
55 | rm -f $(CC)_* *.o *~ $(TARGETS)
56 |
57 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/kernel.h:
--------------------------------------------------------------------------------
1 | #include"nbody.h"
2 |
3 | #ifdef __cplusplus
4 | extern "C" {
5 | #endif
6 |
7 | #ifdef __cplusplus
8 | }
9 | #endif
10 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/nbody.h:
--------------------------------------------------------------------------------
1 | #ifndef nbody_h
2 | #define nbody_h
3 |
4 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */
5 |
6 | typedef struct {
7 | float position_x; /* m */
8 | float position_y; /* m */
9 | float position_z; /* m */
10 | float velocity_x; /* m/s */
11 | float velocity_y; /* m/s */
12 | float velocity_z; /* m/s */
13 | float mass; /* kg */
14 | float pad;
15 | } Particle;
16 |
17 | #endif /* #ifndef nbody_h */
18 |
19 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-cuda/nbody_input-16384.in:
--------------------------------------------------------------------------------
1 | 16384
2 | 1.0e+10
3 | 1.0e+10
4 | 1.0e+10
5 | 1.0e+0
6 | 10
7 | 11
8 | 12345
9 | 1.0e+28
10 | nbody
11 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/.config/kernel.h:
--------------------------------------------------------------------------------
1 | #include "../nbody.h"
2 |
3 | #ifdef __cplusplus
4 | extern "C" {
5 | #endif
6 |
7 | #pragma omp target device(opencl) ndrange(1,size,MAX_NUM_THREADS) copy_deps
8 | #pragma omp task in(d_particles[0;number_of_particles]) out([size] out)
9 | __kernel void calculate_force_func(int size, float time_interval, int number_of_particles,
10 | __global Particle* d_particles,__global Particle *out,
11 | int first_local, int last_local);
12 | #ifdef __cplusplus
13 | }
14 | #endif
15 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
5 |
6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 1000000000" ./$PROGRAM nbody_input-16384.in
7 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody-p
2 |
3 | export NX_SMP_WORKERS=1
4 | export NX_OPENCL_MAX_DEVICES=2 #max number of opencl devices (GPUs in this case) to use
5 |
6 | NX_ARGS="--cache-policy writethrough --gpu-max-memory 1000000000" ./$PROGRAM nbody_input-16384.in
7 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=nbody
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 |
14 | CFLAGS = --ompss -k --opencl
15 | CFLAGS_P =
16 | CFLAGS_I = --instrument
17 | CFLAGS_D = --debug
18 |
19 | LIBS =
20 | INCS = -I$(PREFIX) --opencl-build-opts=-I$(PREFIX)
21 |
22 | EXTRA = -O3 -Wall -Wno-unused
23 | OBJECTS =
24 |
25 | $(PROGRAM)-p: $(PROGRAM).c $(KERNEL).cl
26 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl
27 |
28 | $(PROGRAM)-i: $(PROGRAM).c $(KERNEL).cl
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl
30 |
31 | $(PROGRAM)-d: $(PROGRAM).c $(KERNEL).cl
32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $< $(LIBS) $(KERNEL).cl
33 |
34 | .c.o:
35 | $(CC) --no-openmp $(EXTRA) -c $<
36 |
37 | clean:
38 | rm -f $(CC)_* *.o *~ $(TARGETS)
39 | rm -f nbody_out.xyz
40 |
41 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/README.rst:
--------------------------------------------------------------------------------
1 | ../nbody-cuda/README.rst
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/kernel.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | const int MAX_NUM_THREADS= 128;
5 |
6 | void Particle_array_calculate_forces_opencl(Particle* this_particle_array, Particle *output_array, int number_of_particles, float time_interval ) {
7 | const int bs = number_of_particles;
8 | int i;
9 |
10 | for ( i = 0; i < number_of_particles; i += bs )
11 | {
12 | //Calling the kernel
13 | ....(bs,time_interval,number_of_particles,this_particle_array, &output_array[i], i, i+bs-1);
14 | }
15 | }
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/kernel.h:
--------------------------------------------------------------------------------
1 | #include "nbody.h"
2 |
3 | #ifdef __cplusplus
4 | extern "C" {
5 | #endif
6 |
7 |
8 | #ifdef __cplusplus
9 | }
10 | #endif
11 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/nbody.h:
--------------------------------------------------------------------------------
1 | /* nbody.h */
2 |
3 | #ifndef nbody_h
4 | #define nbody_h
5 |
6 | #define gravitational_constant 6.6726e-11 /* N(m/kg)2 */
7 |
8 | typedef struct {
9 | float position_x; /* m */
10 | float position_y; /* m */
11 | float position_z; /* m */
12 | float velocity_x; /* m/s */
13 | float velocity_y; /* m/s */
14 | float velocity_z; /* m/s */
15 | float mass; /* kg */
16 | float pad;
17 | } Particle;
18 |
19 | __constant int MAX_NUM_THREADS= 128;
20 |
21 | #endif /* #ifndef nbody_h */
22 |
23 |
--------------------------------------------------------------------------------
/03-gpu-devices/nbody-opencl/nbody_input-16384.in:
--------------------------------------------------------------------------------
1 | 16384
2 | 1.0e+10
3 | 1.0e+10
4 | 1.0e+10
5 | 1.0e+0
6 | 10
7 | 11
8 | 12345
9 | 1.0e+28
10 | nbody
11 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/.config/kernel.h:
--------------------------------------------------------------------------------
1 |
2 | #ifdef __cplusplus
3 | extern "C"
4 | {
5 | #endif
6 |
7 | #pragma omp target device(cuda) copy_deps ndrange( 1,n,128 )
8 | #pragma omp task in([n]x) inout([n]y)
9 | __global__ void saxpy(int n, float a,float* x, float* y);
10 |
11 | #ifdef __cplusplus
12 | }
13 | #endif
14 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=saxpy-p
2 |
3 | export NX_GPUMAXMEM=90
4 |
5 | for gpus in 1 2; do
6 | export NX_GPUS=$gpus
7 | ./$PROGRAM
8 | done
9 |
10 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=saxpy-p
2 |
3 | export NX_GPUMAXMEM=90
4 | export NX_GPUS=2 #change this in order to use more GPUs
5 |
6 | ./$PROGRAM
7 |
8 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=saxpy
2 | KERNEL=kernel
3 | PREFIX=.
4 |
5 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
6 |
7 | JOB_SCHED_VERSION=-smp
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = mcc
13 | KC = nvcc
14 |
15 | CFLAGS = --ompss --cuda
16 | CFLAGS_P =
17 | CFLAGS_I = --instrument
18 | CFLAGS_D = --debug
19 |
20 | NVCFLAGS = -O3
21 |
22 | LIBS =
23 | INCS = -I$(PREFIX)
24 |
25 | EXTRA = -O3 -Wall -Wno-unused
26 | OBJECTS=
27 |
28 | $(PROGRAM)-p: $(PROGRAM)-p.o $(KERNEL).o $(OBJECTS)
29 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
30 |
31 | $(PROGRAM)-i: $(PROGRAM)-i.o $(KERNEL).o $(OBJECTS)
32 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
33 |
34 | $(PROGRAM)-d: $(PROGRAM)-d.o $(KERNEL).o $(OBJECTS)
35 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) -o $@ $< $(LIBS) $(KERNEL).o $(OBJECTS)
36 |
37 |
38 | $(PROGRAM)-p.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
39 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ -c $<
40 |
41 | $(PROGRAM)-i.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
42 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ -c $<
43 |
44 | $(PROGRAM)-d.o: $(PROGRAM).c $(PREFIX)/$(KERNEL).h
45 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ -c $<
46 |
47 |
48 | $(KERNEL).o: $(KERNEL).cu $(PREFIX)/$(KERNEL).h
49 | $(KC) $(INCS) -o $@ -c $<
50 |
51 | .c.o:
52 | $(CC) --no-openmp $(EXTRA) -c $<
53 |
54 | clean:
55 | rm -f $(CC)_* *.o *~ $(TARGETS)
56 |
57 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/kernel.cu:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | __global__ void saxpy(int n, float a, float* x, float* y)
4 | {
5 | int i = blockIdx.x * blockDim.x + threadIdx.x;
6 | if(i < n) y[i] = a * x[i] + y[i];
7 | }
8 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/kernel.h:
--------------------------------------------------------------------------------
1 | #ifdef __cplusplus
2 | extern "C"
3 | {
4 | #endif
5 |
6 | #pragma omp target device(cuda) copy_deps ndrange(/*???*/)
7 | #pragma omp task in([n]x) inout([n]y)
8 | __global__ void saxpy(int n, float a,float* x, float* y);
9 |
10 | #ifdef __cplusplus
11 | }
12 | #endif
13 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-cuda/saxpy.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | #define N 1024*1024
6 | #define BS 64*1024
7 |
8 | int main(int argc, char* argv[])
9 | {
10 | float a=5, *x, *y;
11 | int i;
12 |
13 | x = (float *) malloc(N*sizeof(float));
14 | y = (float *) malloc(N*sizeof(float));
15 |
16 | for (i=0; i
2 |
3 | __kernel void saxpy(int n, float a,
4 | __global float* x, __global float* y) {
5 | int i = get_global_id(0);
6 | if(i < n)
7 | y[i] = a * x[i] + y[i];
8 | }
9 |
10 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-opencl/kernel.h:
--------------------------------------------------------------------------------
1 | #pragma omp target device(opencl) copy_deps /* ndrange(???) */
2 | #pragma omp task in([n]x) inout([n]y)
3 | __kernel void saxpy(int n, float a,
4 | __global float* x, __global float* y);
5 |
--------------------------------------------------------------------------------
/03-gpu-devices/saxpy-opencl/saxpy.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define N 1024
4 | int main(int argc, char* argv[]) {
5 | float a, x[N], y[N];
6 | a=5;
7 | int i;
8 | for (i=0; i> to your job script.
15 |
--------------------------------------------------------------------------------
/04-mpi+ompss/heat/heat.h:
--------------------------------------------------------------------------------
1 | /*
2 | * heat.h
3 | *
4 | * Global definitions for the iterative solver
5 | */
6 |
7 | #include
8 |
9 | // configuration
10 |
11 | typedef struct
12 | {
13 | float posx;
14 | float posy;
15 | float range;
16 | float temp;
17 | }
18 | heatsrc_t;
19 |
20 | typedef struct
21 | {
22 | unsigned maxiter; // maximum number of iterations
23 | unsigned resolution; // spatial resolution
24 | int algorithm; // 0=>Jacobi, 1=>Gauss
25 |
26 | unsigned visres; // visualization resolution
27 |
28 | double *u, *uhelp;
29 | double *uvis;
30 |
31 | unsigned numsrcs; // number of heat sources
32 | heatsrc_t *heatsrcs;
33 | }
34 | algoparam_t;
35 |
36 | // function declarations
37 |
38 | // misc.c
39 | int initialize( algoparam_t *param );
40 | int finalize( algoparam_t *param );
41 | void write_image( FILE * f, double *u,
42 | unsigned sizex, unsigned sizey );
43 | int coarsen(double *uold, unsigned oldx, unsigned oldy ,
44 | double *unew, unsigned newx, unsigned newy );
45 | int read_input( FILE *infile, algoparam_t *param );
46 | void print_params( algoparam_t *param );
47 | double wtime();
48 |
49 | // solvers in solver.c
50 | double relax_redblack( double *u,
51 | unsigned sizex, unsigned sizey );
52 |
53 | double relax_gauss( double *u,
54 | unsigned sizex, unsigned sizey );
55 |
56 | double relax_jacobi( double *u, double *utmp,
57 | unsigned sizex, unsigned sizey );
58 |
59 |
--------------------------------------------------------------------------------
/04-mpi+ompss/heat/test.dat:
--------------------------------------------------------------------------------
1 | 10 # iterations (25000)
2 | 4096 # resolution
3 | 0 # Algorithm 0=Jacobi 1=RedBlack 2=GaussSeidel
4 | 2 # number of heat sources
5 | 0.0 0.0 1.0 2.5 # (x,y), size temperature
6 | 0.5 1.0 1.0 2.5 #
7 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/.config/mm-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bsc-pm/ompss-ee/edc3f13c8137bae7ca691b15fbd18b70d0765311/04-mpi+ompss/matmul/.config/mm-image.png
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | THREADS=(1 2 3 4 5 6)
4 |
5 | for thread in ${THREADS[@]}; do
6 | NX_SMP_WORKERS=$thread ${MPIRUN_COMMAND} ./$PROGRAM
7 | done
8 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul-p
2 |
3 | # Run with 6 threads per MPI process in the same node
4 | export NX_SMP_WORKERS=6
5 |
6 | # Uncomment to instrument
7 | #export INST=./graph.sh
8 | #export INST=./trace.sh
9 |
10 | ${MPIRUN_COMMAND} $INST ./$PROGRAM
11 |
12 | # Generate the trace if needed
13 | if [[ "$INST" == *"trace"* ]]; then
14 | mpi2prv -f TRACE.mpits -o myTrace.prv
15 | fi
16 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM=matmul
2 | PREFIX=.
3 |
4 | TARGETS=$(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION=-mpi
7 | TRACE_VERSION=-mpi
8 |
9 | BASE_DIR=../..
10 | include $(BASE_DIR)/common-files/Makefile
11 |
12 | CC = smpcc
13 |
14 | CFLAGS = --ompss
15 | CFLAGS_P =
16 | CFLAGS_I = --instrument
17 | CFLAGS_D = --debug
18 |
19 | LIBS = -L$(ATLAS_LIB_DIR) -lcblas -latlas
20 | INCS = -I. -I$(ATLAS_INC_DIR)
21 |
22 |
23 | EXTRA = -std=c99 -O3 -Wall -Wno-unused
24 | SOURCES = matmul.c driver.c check.c prthead.c gendat.c prtspeed.c cclock.c layouts.c
25 |
26 | all: $(TARGETS)
27 |
28 | $(PROGRAM)-p: $(SOURCES)
29 | $(MPI_NATIVE_CC)=$(CC) \
30 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
31 |
32 | $(PROGRAM)-i: $(SOURCES)
33 | $(MPI_NATIVE_CC)=$(CC) \
34 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
35 |
36 | $(PROGRAM)-d: $(SOURCES)
37 | $(MPI_NATIVE_CC)=$(CC) \
38 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
39 |
40 | clean:
41 | rm -f $(CC)_* *.o *~ $(TARGETS)
42 |
43 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/bsize.h:
--------------------------------------------------------------------------------
1 | #define REAL double
2 | #define BSIZE 1024
3 |
4 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/cclock.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 |
5 | /* -------------------------------------------------------------------
6 |
7 | This function returns the wall clock time with micro seconds
8 | accuracy.
9 | The data type of the returned value is "double".
10 |
11 | The function can be called from a FORTRAN module. The value
12 | returned by cclock_ and cclock should be of type REAL(Kind = 8).
13 |
14 | -------------------------------------------------------------------
15 | */
16 |
17 | double cclock_( void )
18 | {
19 | const double micro = 1.0e-06; /* Conversion constant */
20 | static long start = 0L, startu;
21 | struct timeval tp; /* Structure used by gettimeofday */
22 | double wall_time; /* To hold the result */
23 |
24 |
25 | if ( gettimeofday( &tp, NULL) == -1 )
26 | wall_time = -1.0e0;
27 | else if( !start ) {
28 | start = tp.tv_sec;
29 | startu = tp.tv_usec;
30 | wall_time = 0.0e0;
31 | }
32 | else
33 | wall_time = (double) (tp.tv_sec - start) + micro*(tp.tv_usec - startu);
34 |
35 | return wall_time;
36 | }
37 |
38 |
39 | double cclock( void )
40 | {
41 | return cclock_();
42 | }
43 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/check.c:
--------------------------------------------------------------------------------
1 | #include "bsize.h"
2 | #include "matmul.h"
3 | #include
4 | #include
5 | #include
6 |
7 | //#pragma omp target device (smp) copy_deps // copy_in ([ts]pb)
8 | //#pragma omp task input(pb[0:ts-1]) concurrent (*ok)
9 | void check_block (int m, int n, double (*pb)[n], double value, double eps, int *ok)
10 | {
11 | int i, j;
12 | int lok=0;
13 |
14 | for(i=0;i eps ) {
17 | lok++;
18 | }
19 | }
20 | }
21 |
22 | if (lok >0) *ok+=lok; //does not matter if no mx
23 | }
24 |
25 | int check(int m, int n, double (*C)[n], double tvalue)
26 | {
27 | double eps;
28 | int i, j, ok = 0;
29 |
30 | eps = 2.0*m*m*DBL_EPSILON;
31 |
32 | for(i=0;i<1;i++){
33 | for(j=0;j<1;j++){
34 | check_block( m, n, C, tvalue, eps, &ok);
35 | }
36 | }
37 |
38 | return( ok );
39 | }
40 |
41 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/gendat.c:
--------------------------------------------------------------------------------
1 | #include "matmul.h"
2 | #include "layouts.h"
3 | #include
4 |
5 |
6 | //#pragma omp target device (smp) copy_deps
7 | //#pragma omp task output(A)
8 | void init_tile (int m, int n, double (*A)[n], double Value )
9 | {
10 | int i, j;
11 |
12 | for( i = 0; i < m; ++i )
13 | for( j = 0; j < n; ++j )
14 | A[i][j] = Value;
15 |
16 | }
17 |
18 | void gendat(int m, int n, double (*A)[m], double (*B)[n], double (*C)[n])
19 | {
20 | int i,j;
21 | double Value;
22 |
23 | for( i = 0; i < 1; ++i )
24 | for( j = 0; j < 1; ++j ) {
25 | Value = 1.0;
26 | init_tile( n, m, A, Value);
27 | Value = 1.0;
28 | init_tile( m, n, B, Value);
29 | Value = 0.0;
30 | init_tile( m, n, C, Value);
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/matmul.c:
--------------------------------------------------------------------------------
1 | #include "bsize.h"
2 | #include "matmul.h"
3 | #include "layouts.h"
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 |
10 | #include
11 |
12 |
13 | // MPI info. Global Variables. Invariant during whole execution
14 | extern int me;
15 | extern int nodes;
16 |
17 | void matmul ( int m, int n, double (*A)[m], double (*B)[n], double (*C)[n] )
18 | {
19 | double (*a)[m];
20 | double (*rbuf)[m];
21 | double (*orig_rbuf)[m];
22 | void *ptmp;
23 | int up, down;
24 | int i;
25 | int it;
26 | int tag = 1000;
27 | int size = m*n;
28 | MPI_Status stats;
29 |
30 | orig_rbuf = rbuf = (double (*)[m])malloc(m*n*sizeof(double));
31 | if (nodes >1) {
32 | up = me0 ? me-1:nodes-1;
34 | } else {
35 | up = down = MPI_PROC_NULL;
36 | }
37 |
38 | a=A;
39 | i = n*me; // first C block (different for each process)
40 | size = m*n;
41 |
42 |
43 | for( it = 0; it < nodes; it++ ) {
44 |
45 | #pragma omp task in (a[0:n-1], B[0:m-1]) inout (C[i:i+n-1][0:n-1]) firstprivate (n,m)
46 | cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, m, 1.0, (double *)a, m, (double *)B, n, 1.0, (double *)&C[i][0], n);
47 |
48 | if (it < nodes-1) {
49 | #pragma omp task in (a[0:n-1]) out (rbuf[0:n-1]) inout(stats) firstprivate (size,m,n,tag,down,up)
50 | MPI_Sendrecv( a, size, MPI_DOUBLE, down, tag, rbuf, size, MPI_DOUBLE, up, tag, MPI_COMM_WORLD, &stats );
51 | }
52 |
53 | i = (i+n)%m; //next C block circular
54 | ptmp=a; a=rbuf; rbuf=ptmp; //swap pointers
55 | }
56 |
57 | #pragma omp taskwait
58 | free (orig_rbuf);
59 | }
60 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/matmul.h:
--------------------------------------------------------------------------------
1 | #ifndef __CUDACC__
2 |
3 | void matmul ( int m, int n, double (*A)[m], double (*B)[n], double (*C)[n] );
4 |
5 | #endif // __CUDACC__
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | #ifdef __CUDACC__
12 |
13 | void dgemm_cublas (int BS, double *A, double *B, double *C);
14 |
15 | #else
16 |
17 | extern int BS;
18 |
19 | //#pragma omp target device (cuda) copy_deps
20 | //#pragma omp task input([n][m]A, [m][n]B) inout([m][n]C)
21 | //void dgemm_cublas (int m, int n, double *A, double *B, double *C);
22 |
23 |
24 | #endif
25 |
26 | #ifdef __cplusplus
27 | }
28 | #endif
29 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/prthead.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | void prthead( int nodes )
4 | {
5 | printf( "matmul: Matrix-matrix multiply test C(m,n) = A(m,l)*B(l,n)\n" );
6 | printf ("Number of MPI processes: %d\n", nodes);
7 | printf( "----------------------------------------------------------\n" );
8 | printf( " Problem size | | | |\n" );
9 | printf( " m | l | n | Time (s) | (Gflop/s) | OK? |\n" );
10 | printf( "----------------------------------------------------------\n" );
11 | fflush(stdout);
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/prtspeed.c:
--------------------------------------------------------------------------------
1 | #include
2 |
3 | #define max(a,b)( ((a) > (b)) ? (a) : (b) )
4 |
5 | void prtspeed( int m, int l, int n, double time, int ok, unsigned long nops )
6 | {
7 | double speed;
8 | // -----------------------------------------------------------------
9 | //speed = 1.0e-9*2*m*l*n/max( time, 1.0e-9 );
10 | speed = 1.0e-9*nops/time;
11 |
12 | // printf( "%4d |%4d | %4d| %11.4lf | %11.4lf | ", m, l, n, time, speed );
13 | printf( "%d\t%d\t%d\t%.4lf\t %.4lf ", m, l, n, time, speed );
14 | if ( ok == 0 )
15 | printf( " T |\n" );
16 | else
17 | printf( " F (%d)|\n", ok );
18 | // printf( "nops = %lu; m = %d; l = %d; n = %d\n", nops, m, l, n );
19 |
20 | fflush(stdout);
21 | }
22 |
--------------------------------------------------------------------------------
/04-mpi+ompss/matmul/test.in:
--------------------------------------------------------------------------------
1 | 1024 1024 2
2 | 2048 2048 1
3 | 2048 2048 1
4 | 4096 4096 1
5 |
--------------------------------------------------------------------------------
/05-ompss+dlb/README.rst:
--------------------------------------------------------------------------------
1 | OmpSs+DLB Exercises
2 | *******************
3 |
4 | .. toctree::
5 | :maxdepth: 2
6 | :numbered:
7 |
8 | pils/README.rst
9 | lulesh/README.rst
10 | lub/README.rst
11 | pils-multiapp/README.rst
12 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lub/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=LUB-p
2 |
3 | # Uncomment to enable DLB
4 | # export NX_ARGS+=" --thread-manager=dlb"
5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK"
6 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi.so
7 |
8 | export NX_ARGS+=" --force-tie-master --warmup-threads"
9 |
10 | for i in $(seq 1 3) ; do
11 | mpirun env LD_PRELOAD=$OMPSSEE_LD_PRELOAD ./$PROGRAM 8000 100 | grep 'time to compute'
12 | done
13 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lub/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=LUB-i
2 |
3 | # Uncomment to instrument
4 | # export INST=./trace.sh
5 |
6 | # Uncomment to enable DLB
7 | # export NX_ARGS+=" --thread-manager=dlb"
8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK"
9 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so
10 |
11 | export NX_ARGS+=" --force-tie-master --warmup-threads"
12 |
13 | mpirun $INST ./$PROGRAM 2000 100
14 |
15 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lub/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = LUB
2 | PREFIX = .
3 |
4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION = -mpi
7 |
8 | BASE_DIR = ../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = smpcc
12 |
13 | CFLAGS = --ompss
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug -DCHECK_RESULT
17 |
18 | LIBS = -lm
19 | INCS =
20 |
21 | EXTRA =
22 |
23 | RM = rm -f
24 |
25 | SOURCES = LUB.c
26 |
27 | all: $(TARGETS)
28 |
29 | $(PROGRAM)-p: $(SOURCES)
30 | $(MPI_NATIVE_CC)=$(CC) \
31 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
32 |
33 | $(PROGRAM)-i: $(SOURCES)
34 | $(MPI_NATIVE_CC)=$(CC) \
35 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
36 |
37 | $(PROGRAM)-d: $(SOURCES)
38 | $(MPI_NATIVE_CC)=$(CC) \
39 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
40 |
41 | clean:
42 | $(RM) $(CC)_* *.o *~ $(TARGETS)
43 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lub/README.rst:
--------------------------------------------------------------------------------
1 | LUB
2 | ---
3 |
4 | .. highlight:: none
5 |
6 | LUB is an LU matrix decomposition by blocks
7 |
8 | Usage::
9 |
10 | ./LUB
11 |
12 | **Goals of this exercise**
13 |
14 | * Run the instrumented version of LUB and analyse the Paraver trace.
15 | * Enable DLB options. Run and analyse the Paraver trace.
16 | * Run the multirun.sh script and compare the execution performance with and without DLB.
17 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lulesh/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=lulesh2.0-p
2 |
3 | # Uncomment to enable DLB
4 | # export NX_ARGS+=" --thread-manager=dlb"
5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK"
6 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi.so
7 | # export I_MPI_WAIT_MODE=1
8 |
9 | export NX_ARGS+=" --force-tie-master --warmup-threads"
10 |
11 | for i in $(seq 1 3) ; do
12 | mpirun -n 27 env LD_PRELOAD=$OMPSSEE_LD_PRELOAD ./$PROGRAM -i 15 -b 8 -s 100 \
13 | | tac | grep -m 1 'Elapsed time'
14 | done
15 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lulesh/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=lulesh2.0-i
2 |
3 | # Uncomment to instrument
4 | # export INST=./trace.sh
5 |
6 | # Uncomment to enable DLB
7 | # export NX_ARGS+=" --thread-manager=dlb"
8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask --lend-mode=BLOCK"
9 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so
10 | # export I_MPI_WAIT_MODE=1
11 |
12 | export NX_ARGS+=" --force-tie-master --warmup-threads"
13 |
14 | mpirun -n 27 $INST ./$PROGRAM -i 5 -b 8 -s 100
15 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lulesh/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = lulesh2.0
2 | PREFIX = .
3 |
4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION = -mpi
7 |
8 | BASE_DIR = ../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CXX = smpcxx
12 |
13 | CFLAGS = --ompss
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS = -lm
19 | INCS =
20 |
21 | EXTRA = -DUSE_MPI=1
22 |
23 | RM = rm -f
24 |
25 | SOURCES = lulesh.cc lulesh-comm.cc lulesh-viz.cc lulesh-util.cc lulesh-init.cc
26 |
27 | all: $(TARGETS)
28 |
29 | $(PROGRAM)-p: $(SOURCES)
30 | $(MPI_NATIVE_CXX)=$(CXX) \
31 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
32 |
33 | $(PROGRAM)-i: $(SOURCES)
34 | $(MPI_NATIVE_CXX)=$(CXX) \
35 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
36 |
37 | $(PROGRAM)-d: $(SOURCES)
38 | $(MPI_NATIVE_CXX)=$(CXX) \
39 | $(MPICXX) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
40 |
41 | clean:
42 | $(RM) $(CC)_* *.o *~ $(TARGETS)
43 |
--------------------------------------------------------------------------------
/05-ompss+dlb/lulesh/README.rst:
--------------------------------------------------------------------------------
1 | Lulesh
2 | ------
3 |
4 | .. highlight:: none
5 |
6 | Lulesh is a benchmark from LLNL, it represents a typical hydrocode like ALE3D.
7 |
8 | Usage::
9 |
10 | ./lulesh2.0 -i -b -s
11 |
12 |
13 | **Goals of this exercise**
14 |
15 | * Run the instrumented version of Lulesh and analyse the Paraver trace.
16 | * Enable DLB options, MPI interception included. Run and analyse the Paraver trace.
17 | * Run the multirun.sh script and compare the execution performance with and without DLB.
18 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=ompss_pils-p
2 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=ompss_pils-i
2 |
3 | # Uncomment to instrument
4 | # export INST=./trace-multiapp.sh
5 |
6 | # Uncomment to enable DLB
7 | # export NX_ARGS+=" --thread-manager=dlb"
8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask"
9 |
10 | export NX_ARGS+=" --warmup-threads"
11 |
12 | export TRACEID=TRACE1
13 | taskset -c 0-7 $INST ./$PROGRAM input1 1 100 500 &
14 |
15 | export TRACEID=TRACE2
16 | taskset -c 8-15 $INST ./$PROGRAM input2 1 100 50 &
17 |
18 | wait
19 |
20 | if [[ -n "$INST" ]] ; then
21 | mpi2prv -f TRACE1.mpits -- -f TRACE2.mpits -o myTrace.prv
22 | fi
23 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = ompss_pils
2 | PREFIX = .
3 |
4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION = -smp
7 |
8 | BASE_DIR = ../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = smpcc
12 |
13 | CFLAGS = --ompss
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS =
19 | INCS =
20 |
21 | EXTRA = -std=c99 -Wall -Wno-unused
22 |
23 | RM = rm -f
24 |
25 | SOURCES = ompss_pils.c
26 |
27 | all: $(TARGETS)
28 |
29 | $(PROGRAM)-p: $(SOURCES)
30 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
31 |
32 | $(PROGRAM)-i: $(SOURCES)
33 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
34 |
35 | $(PROGRAM)-d: $(SOURCES)
36 | $(CC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
37 |
38 | clean:
39 | $(RM) $(CC)_* *.o *~ $(TARGETS)
40 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/README.rst:
--------------------------------------------------------------------------------
1 | PILS - multiapp example
2 | -----------------------
3 |
4 | .. highlight:: c
5 |
6 | This example demonstrates the capabilities of DLB sharing resources with two different
7 | unrelated applications. The run-once.sh script executes two instances of PILS without
8 | MPI support, each one in a different set of CPUs. DLB is able to automatically lend
9 | resources from one to another.
10 |
11 | **Goals of this exercise**
12 |
13 | * Run the script run-once.sh with tracing and DLB enabled, and observe how two
14 | unrelated applications share resources.
15 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/extrae-multiapp.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | 1-3
18 | 1-5
19 | 1-3
20 |
21 |
22 |
23 | $TRACEID$
24 | 5
25 |
26 |
27 |
28 |
29 |
30 |
31 | 5000000
32 |
33 |
34 |
35 |
36 |
37 |
38 | PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_L1_DCM
39 | PAPI_TOT_CYC
40 |
41 |
42 | PAPI_TOT_INS,PAPI_FP_INS,PAPI_TOT_CYC
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
56 | my_trace.prv
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/input1:
--------------------------------------------------------------------------------
1 | 20
2 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/input2:
--------------------------------------------------------------------------------
1 | 200
2 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils-multiapp/trace-multiapp.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | export EXTRAE_CONFIG_FILE=extrae-multiapp.xml
4 | export NX_INSTRUMENTATION=extrae
5 |
6 | $*
7 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils/.config/multirun.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=mpi_ompss_pils-p
2 |
3 | # Uncomment to enable DLB
4 | # export NX_ARGS+=" --thread-manager=dlb"
5 | # export DLB_ARGS+=" --policy=auto_LeWI_mask"
6 |
7 | export NX_ARGS+=" --force-tie-master --warmup-threads"
8 |
9 | for i in $(seq 1 3) ; do
10 | mpirun ./$PROGRAM /dev/null 1 10 500 | grep 'Application time'
11 | done
12 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils/.config/run-once.sh:
--------------------------------------------------------------------------------
1 | PROGRAM=mpi_ompss_pils-i
2 |
3 | # Uncomment to instrument
4 | # export INST=./trace.sh
5 |
6 | # Uncomment to enable DLB
7 | # export NX_ARGS+=" --thread-manager=dlb"
8 | # export DLB_ARGS+=" --policy=auto_LeWI_mask"
9 |
10 | # Uncomment to enable DLB MPI interception
11 | # export OMPSSEE_LD_PRELOAD=$DLB_HOME/lib/libdlb_mpi_instr.so
12 |
13 | export NX_ARGS+=" --force-tie-master --warmup-threads"
14 |
15 | mpirun $INST ./$PROGRAM /dev/null 1 5 500
16 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils/Makefile:
--------------------------------------------------------------------------------
1 | PROGRAM = mpi_ompss_pils
2 | PREFIX = .
3 |
4 | TARGETS = $(PROGRAM)-p $(PROGRAM)-i $(PROGRAM)-d
5 |
6 | JOB_SCHED_VERSION = -mpi
7 |
8 | BASE_DIR = ../..
9 | include $(BASE_DIR)/common-files/Makefile
10 |
11 | CC = smpcc
12 |
13 | CFLAGS = --ompss
14 | CFLAGS_P =
15 | CFLAGS_I = --instrument
16 | CFLAGS_D = --debug
17 |
18 | LIBS = -lm
19 | INCS =
20 |
21 | EXTRA = -std=c99 -Wall -Wno-unused
22 |
23 | RM = rm -f
24 |
25 | SOURCES = mpi_ompss_pils.c
26 |
27 | all: $(TARGETS)
28 |
29 | $(PROGRAM)-p: $(SOURCES)
30 | $(MPI_NATIVE_CC)=$(CC) \
31 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_P) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
32 |
33 | $(PROGRAM)-i: $(SOURCES)
34 | $(MPI_NATIVE_CC)=$(CC) \
35 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_I) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
36 |
37 | $(PROGRAM)-d: $(SOURCES)
38 | $(MPI_NATIVE_CC)=$(CC) \
39 | $(MPICC) $(GFLAGS) $(CFLAGS) $(CFLAGS_D) $(EXTRA) $(INCS) -o $@ $^ $(LIBS)
40 |
41 | clean:
42 | $(RM) $(CC)_* *.o *~ $(TARGETS)
43 |
--------------------------------------------------------------------------------
/05-ompss+dlb/pils/README.rst:
--------------------------------------------------------------------------------
1 | PILS (Parallel ImbaLance Simulator)
2 | -----------------------------------
3 |
4 | .. highlight:: none
5 |
6 | PILS is an MPI+OpenMP/OmpSs synthetic benchmark that measures the execution time
7 | of imbalanced MPI ranks.
8 |
9 | Usage::
10 |
11 | ./mpi_ompss_pils
12 | loads-file: file with load balance (number of tasks per iteration) per process, [100, 250] if /dev/null
13 | parallel-grain: parallelism grain, factor between 0..1 to apply sub-blocking techniques
14 | loops: number of execution loops
15 | task_size: factor to increase task size
16 |
17 | **Goals of this exercise**
18 |
19 | * Run the instrumented version of PILS and generate a Paraver trace.
20 |
21 | * Analyse the load imbalance between MPI ranks.
22 |
23 | * Enable DLB and compare both executions.
24 |
25 | * Observe the dynamic thread creation when other processes suffer load imbalance.
26 | * Analyse the load imbalance of the new execution. Does it improve?
27 |
28 | * Enable DLB MPI interception and trace again. Analyse the new trace.
29 | * Run the multirun.sh script and compare the execution performance with and without DLB.
30 | * Modify the inputs of PILS to reduce load imbalance and see when DLB stops improving performance.
31 |
32 |
--------------------------------------------------------------------------------
/common-files/Makefile:
--------------------------------------------------------------------------------
1 | COMMON_DIR=$(BASE_DIR)/common-files
2 | CONFIG_DIR=.config
3 |
4 | GFLAGS=
5 |
6 | PARAVER=extrae.xml trace.sh
7 | GRAPH=graph.sh
8 | SCRIPTS=run-once.sh multirun.sh
9 |
10 | MPI_CHECK=$(MPI_LIB_DIR)/libmpi.so
11 | MKL_CHECK=$(MKL_LIB_DIR)/libmkl_sequential.so
12 | ATLAS_CHECK=$(ATLAS_LIB_DIR)/libatlas.a
13 |
14 | all: $(TARGETS) $(SCRIPTS) $(PARAVER) $(GRAPH)
15 |
16 | extrae.xml:
17 | cp $(COMMON_DIR)/extrae.xml .
18 |
19 | trace.sh:
20 | cp $(COMMON_DIR)/trace$(TRACE_VERSION).sh trace.sh
21 |
22 | graph.sh:
23 | cp $(COMMON_DIR)/graph.sh .
24 |
25 | run-once.sh: $(COMMON_DIR)/run-once.sh $(CONFIG_DIR)/run-once.sh
26 | cp $(COMMON_DIR)/run-once.sh .
27 | cat $(COMMON_DIR)/sched-job$(JOB_SCHED_VERSION) >> run-once.sh
28 | cat $(CONFIG_DIR)/run-once.sh >> run-once.sh
29 |
30 | multirun.sh: $(COMMON_DIR)/multirun.sh $(CONFIG_DIR)/multirun.sh
31 | cp $(COMMON_DIR)/multirun.sh .
32 | cat $(COMMON_DIR)/sched-job$(JOB_SCHED_VERSION) >> multirun.sh
33 | cat $(CONFIG_DIR)/multirun.sh >> multirun.sh
34 |
35 | $(MPI_CHECK):
36 | @echo "=================================================="
37 | @echo "=== MPI LIBRARY NEEDED ==="
38 | @echo "=================================================="
39 | @false
40 |
41 | $(MKL_CHECK):
42 | @echo "=================================================="
43 | @echo "=== MKL LIBRARY NEEDED ==="
44 | @echo "=================================================="
45 | @false
46 |
47 | $(ATLAS_CHECK):
48 | @echo "=================================================="
49 | @echo "=== ATLAS LIBRARY NEEDED ==="
50 | @echo "=================================================="
51 | @false
52 |
53 | wipe: clean
54 | rm -f $(PARAVER)
55 | rm -f $(SCRIPTS)
56 | rm -f *.out *.err
57 | rm -f *.prv *.row *.pcf
58 | rm -f TRACE.mpits
59 | rm -f -r set-0
60 | rm -f graph.dot graph.pdf
61 |
--------------------------------------------------------------------------------
/common-files/configure_VirtualBox:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
4 |
5 | export OMPSS_HOME=/home/user/Builds/OmpSs/mcxx
6 | export EXTRAE_HOME=/home/user/Builds/extrae
7 | export PARAVER_HOME=/home/user/Tools/paraver
8 | export TEMANEJO_HOME=/home/user/Builds/temanejo
9 | export MPI_HOME=/usr/lib/openmpi
10 | export MPI_LIB_DIR=$MPI_HOME/lib
11 | export MPI_INC_DIR=$MPI_HOME/include
12 | export MPI_CC=mpicc
13 | export MPIRUN_COMMAND="mpirun"
14 | export MKL_LIB_DIR=/home/user/Builds/mkl/lib/intel64
15 | export MKL_INC_DIR=/home/user/Builds/mkl/include
16 | export ATLAS_LIB_DIR=/usr/lib
17 | export ATLAS_INC_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/include
18 |
19 | touch $DIRNAME/sched-job-smp
20 | touch $DIRNAME/sched-job-mpi
21 |
--------------------------------------------------------------------------------
/common-files/configure_default:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
4 |
5 | echo \ \ Using default configuration file:
6 | echo \ \ $BASH_SOURCE
7 |
8 | export OMPSS_HOME=
9 | export EXTRAE_HOME=
10 | export PARAVER_HOME=
11 | export TEMANEJO_HOME=
12 | export MPI_LIB_DIR=
13 | export MPI_INC_DIR=
14 | export MPIRUN_COMMAND="mpirun"
15 | export MKL_LIB_DIR=
16 | export MKL_INC_DIR=
17 | export ATLAS_LIB_DIR=/home/xteruel/Applications/atlas-3.10.3/lib/
18 | export ATLAS_INC_DIR=
19 |
20 | touch $DIRNAME/sched-job-smp
21 | touch $DIRNAME/sched-job-mpi
22 |
--------------------------------------------------------------------------------
/common-files/configure_mn4:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
4 |
5 | export OMPSS_HOME=/apps/PM/ompss/git
6 | export DLB_HOME=/apps/PM/dlb/latest/impi
7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_2017_4
8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest
9 | export TEMANEJO_HOME=
10 | export MPI_HOME=/apps/INTEL/2017.4/impi/2017.3.196
11 | export MPI_LIB_DIR=$MPI_HOME/lib
12 | export MPI_INC_DIR=$MPI_HOME/include
13 | export MPICC=mpiicc
14 | export MPICXX=mpiicpc
15 | export MPI_NATIVE_CC=I_MPI_CC
16 | export MPI_NATIVE_CXX=I_MPI_CXX
17 | export MPIRUN_COMMAND="mpirun"
18 | export MKL_LIB_DIR=/apps/INTEL/2017.4/mkl/lib/intel64
19 | export MKL_INC_DIR=/apps/INTEL/2017.4/mkl/include
20 | export ATLAS_LIB_DIR=/apps/ATLAS/3.10.3/INTEL_BK/lib
21 | export ATLAS_INC_DIR=/apps/ATLAS/3.10.3/INTEL_BK/include
22 |
23 | ln -sf $DIRNAME/sched-job-smp_mn4 $DIRNAME/sched-job-smp
24 | ln -sf $DIRNAME/sched-job-mpi_mn4 $DIRNAME/sched-job-mpi
25 |
26 | module unload openmpi
27 | module load impi/2017.4
28 |
--------------------------------------------------------------------------------
/common-files/configure_nord3:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
4 |
5 | export OMPSS_HOME=/apps/PM/ompss/git
6 | export DLB_HOME=/apps/PM/dlb/latest/impi
7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_5_1_3_210
8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest
9 | export TEMANEJO_HOME=
10 | export MPI_HOME=/apps/INTEL/2016.3.067/impi/5.1.3.210/intel64
11 | export MPI_LIB_DIR=$MPI_HOME/lib
12 | export MPI_INC_DIR=$MPI_HOME/include
13 | export MPICC=mpiicc
14 | export MPICXX=mpiicpc
15 | export MPI_NATIVE_CC=I_MPI_CC
16 | export MPI_NATIVE_CXX=I_MPI_CXX
17 | export MPIRUN_COMMAND="mpirun"
18 |
19 | # Note (vlopez); wxparaver needs glibcxx from gcc >= 5
20 | module load gcc/5.1.0
21 | module unload openmpi
22 | module load impi/5.1.3.210
23 | # Note (gmiranda): if you don't do this, mpiicc can't find icc. Fixme!
24 | module load intel/16.0.0
25 | export MKL_LIB_DIR=/opt/intel/mkl/lib/intel64/
26 | export MKL_INC_DIR=/opt/intel/mkl/include/
27 | export ATLAS_LIB_DIR=/apps/ATLAS/3.10.2/lib
28 | export ATLAS_INC_DIR=/apps/ATLAS/3.10.2/include
29 |
30 | ln -sf $DIRNAME/sched-job-smp_nord3 $DIRNAME/sched-job-smp
31 | ln -sf $DIRNAME/sched-job-mpi_nord3 $DIRNAME/sched-job-mpi
32 |
33 | # Python configuration (needed by Temanejo)
34 | module load python
35 |
--------------------------------------------------------------------------------
/common-files/configure_nvidia:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | DIRNAME=$(readlink -f $(dirname ${BASH_SOURCE[0]}))
4 |
5 | export OMPSS_HOME=/apps/PM/ompss/19.06/
6 | export DLB_HOME=/apps/PM/dlb/latest/bullxmpi
7 | export EXTRAE_HOME=/apps/BSCTOOLS/extrae/latest/impi_2017_1_132
8 | export PARAVER_HOME=/apps/BSCTOOLS/wxparaver/latest
9 | export TEMANEJO_HOME=
10 | export MPI_HOME=/apps/INTEL/2017.1-043/impi/2017.1.132
11 | export MPI_LIB_DIR=$MPI_HOME/lib64
12 | export MPI_INC_DIR=$MPI_HOME/include64
13 | export MPICC=mpiicc
14 | export MPICXX=mpiicpc
15 | export MPI_NATIVE_CC=I_MPI_CC
16 | export MPI_NATIVE_CXX=I_MPI_CXX
17 | export MPIRUN_COMMAND="srun --cpu_bind=cores"
18 | export MKL_LIB_DIR=/opt/compilers/intel/2016.3.067/mkl/lib/intel64/
19 | export MKL_INC_DIR=/opt/compilers/intel/2016.3.067/mkl/include
20 | export ATLAS_LIB_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/lib
21 | export ATLAS_INC_DIR=/gpfs/apps/NVIDIA/ATLAS/3.9.51/include/
22 |
23 | ln -sf $DIRNAME/sched-job-smp_nvidia $DIRNAME/sched-job-smp
24 | ln -sf $DIRNAME/sched-job-mpi_nvidia $DIRNAME/sched-job-mpi
25 |
26 | module unload gcc
27 | module load gcc/4.6.1
28 | module load GRAPHVIZ
29 | module unload bullxmpi
30 | module load impi/2017.1
31 | module unload cuda
32 | module load cuda/8.0
33 |
34 | alias submit=mnsubmit
35 | alias queue=mnq
36 |
--------------------------------------------------------------------------------
/common-files/extrae.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 | 1-3
18 | 1-5
19 | 1-3
20 |
21 |
22 |
23 | TRACE
24 | 5
25 |
26 |
27 |
28 |
29 |
30 |
31 | 5000000
32 |
33 |
34 |
35 |
36 |
37 |
38 | PAPI_TOT_INS,PAPI_TOT_CYC,PAPI_L1_DCM
39 | PAPI_TOT_CYC
40 |
41 |
42 | PAPI_TOT_INS,PAPI_FP_INS,PAPI_TOT_CYC
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
56 | my_trace.prv
57 |
58 |
59 |
60 |
--------------------------------------------------------------------------------
/common-files/graph.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Instrumentation to generate a task dependency graph
4 | export NX_INSTRUMENTATION=tdg
5 |
6 | $*
7 |
--------------------------------------------------------------------------------
/common-files/multirun.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
--------------------------------------------------------------------------------
/common-files/paraver:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | module load paraver
4 | wxparaver $*
5 |
--------------------------------------------------------------------------------
/common-files/run-once.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
--------------------------------------------------------------------------------
/common-files/sched-job-mpi_mn4:
--------------------------------------------------------------------------------
1 | #SBATCH --job-name=ompss-ee
2 | #SBATCH --workdir=.
3 | #SBATCH --output=ompss-ee_%j.out
4 | #SBATCH --error=ompss-ee_%j.err
5 | #SBATCH --cpus-per-task=24
6 | #SBATCH --ntasks=2
7 | #SBATCH --time=00:15:00
8 | #SBATCH --qos=debug
9 |
--------------------------------------------------------------------------------
/common-files/sched-job-mpi_nord3:
--------------------------------------------------------------------------------
1 | #BSUB -n 16
2 | #BSUB -R "span[ptile=8]"
3 | #BSUB -oo ompss-ee_%J.out
4 | #BSUB -eo ompss-ee_%J.err
5 | ##BSUB -U patc5
6 | #BSUB -J ompss-ee
7 | #BSUB -W 00:15
8 | #BSUB -x
9 |
10 |
--------------------------------------------------------------------------------
/common-files/sched-job-mpi_nvidia:
--------------------------------------------------------------------------------
1 | # @ job_name = ompss-ee
2 | # @ partition = debug
3 | ## @ reservation =
4 | # @ initialdir = .
5 | # @ output = ompss-ee_%j.out
6 | # @ error = ompss-ee_%j.err
7 | # @ total_tasks = 2
8 | # @ gpus_per_node = 2
9 | # @ cpus_per_task = 6
10 | # @ node_usage = not_shared
11 | # @ features = k80
12 | # @ wall_clock_limit = 00:15:00
13 |
--------------------------------------------------------------------------------
/common-files/sched-job-smp_mn4:
--------------------------------------------------------------------------------
1 | #SBATCH --job-name=ompss-ee
2 | #SBATCH --workdir=.
3 | #SBATCH --output=ompss-ee_%j.out
4 | #SBATCH --error=ompss-ee_%j.err
5 | #SBATCH --cpus-per-task=48
6 | #SBATCH --ntasks=1
7 | #SBATCH --time=00:15:00
8 | #SBATCH --qos=debug
9 |
--------------------------------------------------------------------------------
/common-files/sched-job-smp_nord3:
--------------------------------------------------------------------------------
1 | #BSUB -n 16
2 | #BSUB -R "span[ptile=16]"
3 | #BSUB -oo ompss-ee_%J.out
4 | #BSUB -eo ompss-ee_%J.err
5 | ##BSUB -U patc5
6 | #BSUB -J ompss-ee
7 | #BSUB -W 00:15
8 | #BSUB -x
9 |
10 |
--------------------------------------------------------------------------------
/common-files/sched-job-smp_nvidia:
--------------------------------------------------------------------------------
1 | # @ job_name = ompss-ee
2 | # @ partition = debug
3 | ## @ reservation =
4 | # @ initialdir = .
5 | # @ output = ompss-ee_%j.out
6 | # @ error = ompss-ee_%j.err
7 | # @ total_tasks = 1
8 | # @ gpus_per_node = 2
9 | # @ cpus_per_task = 12
10 | # @ node_usage = not_shared
11 | # @ features = k80
12 | # @ wall_clock_limit = 00:15:00
13 |
--------------------------------------------------------------------------------
/common-files/trace-mpi.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Uncomment the following line to trace MPI+OmpSs programs
4 | export LD_PRELOAD=${EXTRAE_HOME}/lib/libnanosmpitrace.so
5 |
6 | # Uncomment the following line to trace MPI+OpenMP (GNU) programs
7 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libompitrace.so
8 |
9 | export LD_PRELOAD="$LD_PRELOAD:$OMPSSEE_LD_PRELOAD"
10 |
11 | export EXTRAE_CONFIG_FILE=extrae.xml
12 | export NX_INSTRUMENTATION=extrae
13 |
14 | $*
15 |
16 |
--------------------------------------------------------------------------------
/common-files/trace.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Uncomment the following line to trace MPI+OmpSs programs
4 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libnanosmpitrace.so
5 |
6 | # Uncomment the following line to trace MPI+OpenMP (GNU) programs
7 | #export LD_PRELOAD=${EXTRAE_HOME}/lib/libompitrace.so
8 |
9 | export LD_PRELOAD="$LD_PRELOAD:$OMPSSEE_LD_PRELOAD"
10 |
11 | export EXTRAE_CONFIG_FILE=extrae.xml
12 | export NX_INSTRUMENTATION=extrae
13 |
14 | $*
15 |
16 | mpi2prv -f TRACE.mpits -o myTrace.prv
17 |
--------------------------------------------------------------------------------
/paraver-cfgs/cluster/network_transfers_and_bw.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Network transfers and Recv Bandwidth (MB/s) >
7 | ################################################################################
8 | window_name Network transfers and Recv Bandwidth (MB/s)
9 | window_type single
10 | window_id 1
11 | window_position_x 1440
12 | window_position_y 362
13 | window_width 838
14 | window_height 307
15 | window_comm_lines_enabled true
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 5080.783383360959
26 | window_minimum_y 0.504376358863
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_labels_to_draw 1
38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Recv BandWidth}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
40 | window_semantic_module thread Recv BandWidth { 1, { 1 1.000000000000 } }
41 | window_filter_module tag_msg 1 3
42 | window_filter_module evt_type 1 9200011
43 |
44 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/sanity_checks/flushing.cfg:
--------------------------------------------------------------------------------
1 | version 3.3
2 | number_of_windows 1
3 | begin_description
4 | Dark blue indicates that OPMItrace was flushing the trace buffer to disk. This may result in a strong perturbation of the trace.
5 | end_description
6 |
7 | ################################################################################
8 | < NEW DISPLAYING WINDOW Flushing >
9 | ################################################################################
10 | window_name Flushing
11 | window_type single
12 | window_id 1
13 | window_position_x 275
14 | window_position_y 267
15 | window_width 600
16 | window_height 115
17 | window_comm_lines_enabled false
18 | window_flags_enabled true
19 | window_maximum_y 34.000000
20 | window_scale_relative 1.000000
21 | window_object appl { 1, { All } }
22 | window_begin_time_relative 0.000000000000
23 | window_pos_to_disp 598
24 | window_pos_of_x_scale 18
25 | window_pos_of_y_scale 85
26 | window_number_of_row 128
27 | window_click_options 0 0 1 0 0 0
28 | window_click_info 0 412653 412845 0 412749
29 | window_expanded false
30 | window_open false
31 | window_drawmode 1
32 | window_drawmode_rows 1
33 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
34 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
35 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
36 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
37 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } }
38 | window_semantic_module compose1 Is In Range { 2, { 1 32.000000, 1 32.000000 } }
39 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } }
40 | window_analyzer_executed 1
41 | window_analyzer_info 0.000000 31461579357.000000 1 128
42 | window_filter_module evt_type 1 40000003
43 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/instantaneous_parallelism.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Instantaneous parallelism
5 |
6 |
7 | end_description
8 |
9 | ################################################################################
10 | < NEW DISPLAYING WINDOW Parallelism profile >
11 | ################################################################################
12 | window_name Parallelism profile
13 | window_type single
14 | window_id 1
15 | window_position_x 629
16 | window_position_y 337
17 | window_width 600
18 | window_height 147
19 | window_comm_lines_enabled false
20 | window_noncolor_mode false
21 | window_maximum_y 512.000000
22 | window_minimum_y 1.000000
23 | window_level appl
24 | window_scale_relative 1.000000
25 | window_object appl { 1, { 1 } }
26 | window_begin_time_relative 0.000000000000
27 | window_pos_to_disp 597
28 | window_pos_of_x_scale 18
29 | window_pos_of_y_scale 135
30 | window_number_of_row 1
31 | window_click_options 1 0 1 1 1 0
32 | window_click_info 0 1536415172456 1538138010346 35 1537276591401
33 | window_expanded false
34 | window_open false
35 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
36 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
37 | window_analyzer_executed 0
38 | window_analyzer_info 0.000000 0.000000 0 0
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/not_useful.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Not Useful >
6 | ################################################################################
7 | window_name Not Useful
8 | window_type single
9 | window_id 1
10 | window_position_x 387
11 | window_position_y 287
12 | window_width 600
13 | window_height 147
14 | window_comm_lines_enabled false
15 | window_scale_relative 0.955252
16 | window_object appl { 1, { All } }
17 | window_begin_time_relative 0.000000000000
18 | window_pos_to_disp 598
19 | window_pos_of_x_scale 18
20 | window_pos_of_y_scale 80
21 | window_number_of_row 16
22 | window_click_options 1 0 1 1 1 0
23 | window_click_info 0 1536415172456 1538138010346 35 1537276591401
24 | window_expanded false
25 | window_open false
26 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
27 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, 1-Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
28 | window_analyzer_executed 1
29 | window_analyzer_info 947824158.542278 64722849683.315544 1 16
30 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/one.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW One >
6 | ################################################################################
7 | window_name One
8 | window_type single
9 | window_id 1
10 | window_position_x 390
11 | window_position_y 52
12 | window_width 600
13 | window_height 147
14 | window_comm_lines_enabled false
15 | window_maximum_y 16.000000
16 | window_minimum_y 1.000000
17 | window_scale_relative 1.000001
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 597
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 32
24 | window_click_options 1 0 1 1 1 0
25 | window_click_info 1 1536415172456 1538138010346 35 1537276591401
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Thread ID}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
29 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
30 | window_analyzer_executed 0
31 | window_analyzer_info 0.000000 0.000000 0 0
32 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/useful.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Useful >
6 | ################################################################################
7 | window_name Useful
8 | window_type single
9 | window_id 1
10 | window_position_x 416
11 | window_position_y 96
12 | window_width 600
13 | window_height 147
14 | window_comm_lines_enabled false
15 | window_scale_relative 1.028112
16 | window_object appl { 1, { All } }
17 | window_begin_time_relative 0.000000000000
18 | window_pos_to_disp 583
19 | window_pos_of_x_scale 18
20 | window_pos_of_y_scale 85
21 | window_number_of_row 128
22 | window_click_options 1 0 1 1 1 0
23 | window_click_info 0 1536415172456 1538138010346 35 1537276591401
24 | window_expanded false
25 | window_open false
26 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Useful}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
27 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
28 | window_analyzer_executed 0
29 | window_analyzer_info 0.000000 0.000000 0 0
30 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/useful_duration.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Useful Duration >
7 | ################################################################################
8 | window_name Useful Duration
9 | window_type single
10 | window_id 1
11 | window_position_x 501
12 | window_position_y 37
13 | window_width 600
14 | window_height 242
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered true
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 1505618.213999999900
26 | window_minimum_y 4.928000000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 3
35 | window_drawmode_rows 4
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, State Record Dur.}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, =}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_semantic_module thread State Record Dur. { 1, { 1 1.000000000000 } }
39 |
40 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/user_functions.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Color identifies user funciton being executed by each thread
5 | end_description
6 |
7 | ################################################################################
8 | < NEW DISPLAYING WINDOW User function x thread >
9 | ################################################################################
10 | window_name User function x thread
11 | window_type single
12 | window_id 1
13 | window_position_x 375
14 | window_position_y 84
15 | window_width 600
16 | window_height 114
17 | window_comm_lines_enabled false
18 | window_compute_y_max
19 | window_minimum_y 10.000000
20 | window_scale_relative 1.000000
21 | window_object appl { 1, { All } }
22 | window_begin_time_relative 0.000000000000
23 | window_pos_to_disp 597
24 | window_pos_of_x_scale 18
25 | window_pos_of_y_scale 75
26 | window_number_of_row 9
27 | window_click_options 1 0 1 0 0 0
28 | window_click_info 1 21272293602 23774916378 4 22523604990
29 | window_expanded false
30 | window_open false
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 60000019
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/general/views/user_functions_duration.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW User function duration >
6 | ################################################################################
7 | window_name User function duration
8 | window_type single
9 | window_id 1
10 | window_position_x 381
11 | window_position_y 257
12 | window_width 601
13 | window_height 129
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 99503731.571000
17 | window_level task
18 | window_scale_relative 1.000000
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 599
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 64
25 | window_click_options 1 0 1 0 0 0
26 | window_click_info 0 8322982511 11007815579 5 9665399045
27 | window_expanded false
28 | window_open false
29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Int. Between Evt}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
30 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
32 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
33 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } }
34 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 60000019
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/active_set.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Active counter set >
6 | ################################################################################
7 | window_name Active counter set
8 | window_type single
9 | window_id 1
10 | window_position_x 105
11 | window_position_y 373
12 | window_width 600
13 | window_height 134
14 | window_physical_filtered true
15 | window_comm_lines_enabled false
16 | window_maximum_y 9223372036854775808.000000
17 | window_minimum_y 1.000000
18 | window_scale_relative 1.000000
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 598
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 140
24 | window_number_of_row 16
25 | window_click_options 1 0 1 0 1 0
26 | window_click_info 1 33226631649 35583130347 11 34404880998
27 | window_expanded false
28 | window_open false
29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, =}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
30 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
31 | window_analyzer_executed 0
32 | window_analyzer_info 0.000000 0.000000 0 0
33 | window_filter_module evt_type 1 42009999
34 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/architecture/L3_misses.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW L3 cache misses >
6 | ################################################################################
7 | window_name L3 cache misses
8 | window_type single
9 | window_id 1
10 | window_position_x 408
11 | window_position_y 181
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 240505.000000
17 | window_minimum_y 1.000000
18 | window_scale_relative 1.000000
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 597
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 32
25 | window_click_options 1 0 1 0 0 0
26 | window_click_info 1 1151798560 1190191844 23 1170995202
27 | window_expanded false
28 | window_open false
29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
30 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
32 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
33 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
35 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 42000008
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/architecture/loaded_bytes.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Loaded Bytes >
7 | ################################################################################
8 | window_name Loaded Bytes
9 | window_type single
10 | window_id 1
11 | window_position_x 346
12 | window_position_y 58
13 | window_width 600
14 | window_height 114
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 210450432.000000000000
26 | window_minimum_y 128.000000000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 0.999954755396
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open false
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Prod}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_semantic_module compose_thread Prod { 1, { 1 128.000000000000 } }
40 | window_filter_module evt_type 1 42000002
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/architecture/useful_loadad_bytes.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Useful Loaded Bytes >
7 | ################################################################################
8 | window_name Useful Loaded Bytes
9 | window_type single
10 | window_id 1
11 | window_position_x 627
12 | window_position_y 283
13 | window_width 600
14 | window_height 114
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 210450432.000000000000
26 | window_minimum_y 128.000000000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open false
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Prod}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_semantic_module compose_thread Prod { 1, { 1 128.000000000000 } }
40 | window_filter_module evt_type 1 42000002
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/mx_counters/nb_medium_msgs_sent.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW nb medium msgs sent >
6 | ################################################################################
7 | window_name nb medium msgs sent
8 | window_type single
9 | window_id 1
10 | window_position_x 293
11 | window_position_y 243
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 1600.000000
17 | window_scale_relative 0.108983
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 598
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 32
24 | window_click_options 0 0 1 0 0 0
25 | window_click_info 0 4628104571 5064718209 20 4846411390
26 | window_expanded false
27 | window_open false
28 | window_drawmode 0
29 | window_drawmode_rows 0
30 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
31 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
32 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
34 | window_analyzer_executed 0
35 | window_analyzer_info 0.000000 0.000000 0 0
36 | window_filter_module evt_type 1 11057
37 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/mx_counters/nb_rndv_msgs_sent.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW nb rndv msgs sent >
6 | ################################################################################
7 | window_name nb rndv msgs sent
8 | window_type single
9 | window_id 1
10 | window_position_x 293
11 | window_position_y 243
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_flags_enabled true
16 | window_color_mode window_in_null_gradient_mode
17 | window_maximum_y 1600.000000
18 | window_scale_relative 1.000000
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 597
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 32
25 | window_click_options 0 0 1 0 0 0
26 | window_click_info 1 2613055805 2618823827 18 2615939816
27 | window_expanded false
28 | window_open false
29 | window_drawmode 0
30 | window_drawmode_rows 0
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 11058
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/mx_counters/nb_small_msgs_sent.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW nb small msgs sent >
6 | ################################################################################
7 | window_name nb small msgs sent
8 | window_type single
9 | window_id 1
10 | window_position_x 293
11 | window_position_y 243
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 56357.000000
17 | window_minimum_y 23406.000000
18 | window_scale_relative 0.108983
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 598
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 32
25 | window_click_options 0 0 1 0 0 0
26 | window_click_info 0 4628104571 5064718209 20 4846411390
27 | window_expanded false
28 | window_open false
29 | window_drawmode 0
30 | window_drawmode_rows 0
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 11056
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/mx_counters/route_dispersion.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Route dispersion (Port 0) >
6 | ################################################################################
7 | window_name Route dispersion (Port 0)
8 | window_type single
9 | window_id 1
10 | window_position_x 393
11 | window_position_y 304
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 56357.000000
17 | window_minimum_y 23406.000000
18 | window_scale_relative 1.019876
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 584
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 32
25 | window_click_options 0 0 1 0 0 0
26 | window_click_info 1 4628104571 5064718209 20 4846411390
27 | window_expanded false
28 | window_open false
29 | window_drawmode 0
30 | window_drawmode_rows 0
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 11076
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/performance/MFLOPS.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW MFLOPs >
6 | ################################################################################
7 | window_name MFLOPs
8 | window_type single
9 | window_id 1
10 | window_position_x 393
11 | window_position_y 280
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 714.715481
17 | window_minimum_y 0.011034
18 | window_scale_relative 1.000000
19 | window_object appl { 1, { All } }
20 | window_begin_time_relative 0.000000000000
21 | window_pos_to_disp 598
22 | window_pos_of_x_scale 18
23 | window_pos_of_y_scale 80
24 | window_number_of_row 32
25 | window_click_options 1 0 1 1 1 0
26 | window_click_info 1 154474143090 154504805486 1 154489474288
27 | window_expanded false
28 | window_open false
29 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
33 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
35 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 42000052
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/performance/MFMAS.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW FMAs per microsecond >
6 | ################################################################################
7 | window_name FMAs per microsecond
8 | window_type single
9 | window_id 1
10 | window_position_x 369
11 | window_position_y 201
12 | window_width 600
13 | window_height 114
14 | window_color_mode window_in_null_gradient_mode
15 | window_maximum_y 2893.535012
16 | window_minimum_y 0.003315
17 | window_scale_relative 0.994231
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 598
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 28
24 | window_click_options 0 0 1 0 0 0
25 | window_click_info 1 19640173623 20816231923 7 20228202773
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 42000048
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/performance/MLoadS.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Loads per microsecond >
6 | ################################################################################
7 | window_name Loads per microsecond
8 | window_type single
9 | window_id 1
10 | window_position_x 131
11 | window_position_y 189
12 | window_width 600
13 | window_height 114
14 | window_color_mode window_in_null_gradient_mode
15 | window_maximum_y 2893.535012
16 | window_minimum_y 0.003315
17 | window_scale_relative 0.994231
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 599
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 28
24 | window_click_options 0 0 1 0 0 0
25 | window_click_info 1 28040848529 29180720419 24 28610784474
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 42000053
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/performance/NoIssue_cycles_per_us.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW No issue cycles per microsecond >
6 | ################################################################################
7 | window_name No issue cycles per microsecond
8 | window_type single
9 | window_id 1
10 | window_position_x 384
11 | window_position_y 103
12 | window_width 600
13 | window_height 114
14 | window_color_mode window_in_null_gradient_mode
15 | window_maximum_y 5.584969
16 | window_minimum_y 0.003315
17 | window_scale_relative 1.000000
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 597
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 28
24 | window_click_options 1 0 1 1 1 0
25 | window_click_info 1 3829117335 3945427411 8 3887272373
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
32 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
33 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
34 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
35 | window_analyzer_executed 1
36 | window_analyzer_info 0.000000 285780830000.000000 1 74
37 | window_filter_module evt_type 1 42000037
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/performance/cycles_per_us.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW cycles per us >
6 | ################################################################################
7 | window_name cycles per us
8 | window_type single
9 | window_id 1
10 | window_position_x 312
11 | window_position_y 336
12 | window_width 600
13 | window_height 114
14 | window_color_mode window_in_null_gradient_mode
15 | window_maximum_y 2400.000000
16 | window_scale_relative 1.000000
17 | window_object appl { 1, { All } }
18 | window_begin_time_relative 0.000000000000
19 | window_pos_to_disp 598
20 | window_pos_of_x_scale 18
21 | window_pos_of_y_scale 80
22 | window_number_of_row 64
23 | window_click_options 1 0 1 1 1 0
24 | window_click_info 0 719698 719868 2 719698
25 | window_expanded false
26 | window_open false
27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Avg Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
28 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
29 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
30 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
31 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
32 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } }
33 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } }
34 | window_analyzer_executed 0
35 | window_analyzer_info 0.000000 0.000000 0 0
36 | window_filter_module evt_type 1 42000059
37 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/program/Load_stores.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Load/stores >
6 | ################################################################################
7 | window_name Load/stores
8 | window_type single
9 | window_id 1
10 | window_position_x 349
11 | window_position_y 262
12 | window_width 600
13 | window_height 114
14 | window_color_mode window_in_null_gradient_mode
15 | window_maximum_y 85729243176.000000
16 | window_scale_relative 1.081834
17 | window_object appl { 1, { All } }
18 | window_begin_time_relative 0.000000000000
19 | window_pos_to_disp 558
20 | window_pos_of_x_scale 18
21 | window_pos_of_y_scale 80
22 | window_number_of_row 64
23 | window_click_options 1 0 1 1 1 0
24 | window_click_info 0 719698 719868 2 719698
25 | window_expanded false
26 | window_open false
27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
28 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
31 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
32 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
33 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } }
34 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 42000060
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/program/Loads.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Loads >
6 | ################################################################################
7 | window_name Loads
8 | window_type single
9 | window_id 1
10 | window_position_x 354
11 | window_position_y 284
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 2551665770.000000
17 | window_scale_relative 1.109738
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 546
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 32
24 | window_click_options 1 0 1 1 1 0
25 | window_click_info 0 719698 719868 2 719698
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
35 | window_semantic_module topcompose Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 42000053
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/program/Stores.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Stores >
6 | ################################################################################
7 | window_name Stores
8 | window_type single
9 | window_id 1
10 | window_position_x 354
11 | window_position_y 284
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 2551665770.000000
17 | window_scale_relative 1.109738
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 546
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 32
24 | window_click_options 1 0 1 1 1 0
25 | window_click_info 0 719698 719868 2 719698
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
33 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
34 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
35 | window_semantic_module topcompose Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 42000054
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/hwc/papi/program/instructions.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Instructions >
6 | ################################################################################
7 | window_name Instructions
8 | window_type single
9 | window_id 1
10 | window_position_x 241
11 | window_position_y 462
12 | window_width 600
13 | window_height 114
14 | window_comm_lines_enabled false
15 | window_color_mode window_in_null_gradient_mode
16 | window_maximum_y 451758142613.000000
17 | window_scale_relative 1.117997
18 | window_object appl { 1, { All } }
19 | window_begin_time_relative 0.000000000000
20 | window_pos_to_disp 543
21 | window_pos_of_x_scale 18
22 | window_pos_of_y_scale 80
23 | window_number_of_row 64
24 | window_click_options 1 0 1 1 1 0
25 | window_click_info 0 719698 719868 2 719698
26 | window_expanded false
27 | window_open false
28 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Next Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
29 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
30 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
31 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
32 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
33 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
34 | window_semantic_module compose1 Prod { 1, { 1 1.000000 } }
35 | window_semantic_module compose2 Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 42000050
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/sanity_checks/backward_msgs.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Number of messages comming to a node from the future
5 |
6 | end_description
7 |
8 | ################################################################################
9 | < NEW DISPLAYING WINDOW Incoming backward msgs >
10 | ################################################################################
11 | window_name Incoming backward msgs
12 | window_type single
13 | window_id 1
14 | window_position_x 232
15 | window_position_y 98
16 | window_width 600
17 | window_height 671
18 | window_maximum_y 70.000000
19 | window_scale_relative 0.988383
20 | window_object appl { 1, { All } }
21 | window_begin_time_relative 0.000000000000
22 | window_pos_to_disp 598
23 | window_pos_of_x_scale 18
24 | window_pos_of_y_scale 85
25 | window_number_of_row 128
26 | window_click_options 1 0 1 0 0 0
27 | window_click_info 1 930017142 931047722 46 930532432
28 | window_expanded false
29 | window_open false
30 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Recv Negative Messages}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, <}, {evt_type, =}, {evt_value, All} } }
31 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
32 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Send BandWidth { 1, { 1 1024.000000 } }
34 | window_semantic_module thread Recv BandWidth { 1, { 1 1024.000000 } }
35 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
36 | window_analyzer_executed 1
37 | window_analyzer_info 0.000000 10705615358.000000 1 96
38 | window_filter_module bw_msg 1 0.000000
39 | window_filter_module evt_type 1 50000001
40 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/MPI_call.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Color identifies MPI call
5 | Light blue: outside MPI
6 |
7 |
8 |
9 |
10 |
11 | end_description
12 |
13 | ################################################################################
14 | < NEW DISPLAYING WINDOW MPI call >
15 | ################################################################################
16 | window_name MPI call
17 | window_type single
18 | window_id 1
19 | window_position_x 336
20 | window_position_y 153
21 | window_width 600
22 | window_height 114
23 | window_comm_lines_enabled false
24 | window_maximum_y 115.000000
25 | window_minimum_y 2.000000
26 | window_scale_relative 1.028112
27 | window_object appl { 1, { All } }
28 | window_begin_time_relative 0.000000000000
29 | window_pos_to_disp 580
30 | window_pos_of_x_scale 18
31 | window_pos_of_y_scale 115
32 | window_number_of_row 16
33 | window_click_options 1 0 1 0 0 0
34 | window_click_info 1 32283334120 32309105806 0 32296219963
35 | window_expanded false
36 | window_open false
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, [x,y]}, {evt_value, All} } }
38 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
39 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
40 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
41 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
42 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
43 | window_analyzer_executed 0
44 | window_analyzer_info 0.000000 0.000000 0 0
45 | window_filter_module evt_type 2 50000001 50000003
46 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/advanced/total_bytes_in_transit.cfg:
--------------------------------------------------------------------------------
1 | version 3.3
2 | number_of_windows 1
3 | begin_description
4 | Total number of bytes in transit.
5 | end_description
6 |
7 | ################################################################################
8 | < NEW DISPLAYING WINDOW total bytes in transit >
9 | ################################################################################
10 | window_name total bytes in transit
11 | window_type single
12 | window_id 1
13 | window_position_x 404
14 | window_position_y 517
15 | window_width 600
16 | window_height 140
17 | window_comm_lines_enabled false
18 | window_noncolor_mode false
19 | window_color_mode window_in_null_gradient_mode
20 | window_maximum_y 2723208.000000
21 | window_level appl
22 | window_scale_relative 0.117784
23 | window_object appl { 1, { 1 } }
24 | window_begin_time_relative 0.000000000000
25 | window_pos_to_disp 598
26 | window_pos_of_x_scale 18
27 | window_pos_of_y_scale 85
28 | window_number_of_row 1
29 | window_click_options 1 0 1 0 0 0
30 | window_click_info 1 13819603 14039945 0 13929774
31 | window_expanded false
32 | window_open false
33 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Recv Bytes in Transit}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
34 | window_compose_functions { 2, { {compose1, As Is}, {compose2, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/collectives/MPI_collective_call.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Color identifies MPI collective call
5 | Light blue: outside MPI collective
6 |
7 |
8 |
9 |
10 |
11 | end_description
12 |
13 | ################################################################################
14 | < NEW DISPLAYING WINDOW MPI collective call >
15 | ################################################################################
16 | window_name MPI collective call
17 | window_type single
18 | window_id 1
19 | window_position_x 262
20 | window_position_y 73
21 | window_width 600
22 | window_height 114
23 | window_comm_lines_enabled false
24 | window_maximum_y 70.000000
25 | window_scale_relative 1.000000
26 | window_object appl { 1, { All } }
27 | window_begin_time_relative 0.000000000000
28 | window_pos_to_disp 597
29 | window_pos_of_x_scale 18
30 | window_pos_of_y_scale 115
31 | window_number_of_row 32
32 | window_click_options 1 0 1 0 0 0
33 | window_click_info 1 32283334120 32309105806 0 32296219963
34 | window_expanded false
35 | window_open false
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
38 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
40 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
41 | window_analyzer_executed 0
42 | window_analyzer_info 0.000000 0.000000 0 0
43 | window_filter_module evt_type 1 50000002
44 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/collectives/collective_root.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Collective root >
7 | ################################################################################
8 | window_name Collective root
9 | window_type single
10 | window_id 1
11 | window_position_x 444
12 | window_position_y 481
13 | window_width 600
14 | window_height 114
15 | window_comm_lines_enabled false
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 91.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_semantic_module task Thread i { 1, { 1 0.000000000000 } }
39 | window_filter_module evt_type 1 50100003
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/collectives/outside_collective.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Outside MPI collectives >
6 | ################################################################################
7 | window_name Outside MPI collectives
8 | window_type single
9 | window_id 1
10 | window_position_x 310
11 | window_position_y 203
12 | window_width 600
13 | window_height 134
14 | window_comm_lines_enabled false
15 | window_maximum_y 70.000000
16 | window_scale_relative 1.000000
17 | window_object appl { 1, { All } }
18 | window_begin_time_relative 0.000000000000
19 | window_pos_to_disp 597
20 | window_pos_of_x_scale 18
21 | window_pos_of_y_scale 120
22 | window_number_of_row 128
23 | window_click_options 1 0 1 0 0 0
24 | window_click_info 0 32283334120 32309105806 0 32296219963
25 | window_expanded false
26 | window_open false
27 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
28 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
29 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
30 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, 1-Sign}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
31 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
32 | window_semantic_module topcompose2 Prod { 1, { 1 1.000000 } }
33 | window_analyzer_executed 0
34 | window_analyzer_info 0.000000 0.000000 0 0
35 | window_filter_module evt_type 1 50000002
36 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/In_MPI_send_pt2pt_call.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW In MPI send p2p call >
7 | ################################################################################
8 | window_name In MPI send p2p call
9 | window_type single
10 | window_id 1
11 | window_position_x 420
12 | window_position_y 205
13 | window_width 600
14 | window_height 147
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 70.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000001000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 0
34 | window_drawmode_rows 0
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Is Equal (Sign)}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_semantic_module task Thread i { 1, { 1 0.000000000000 } }
39 | window_semantic_module compose_thread Is Equal (Sign) { 1, { 5 1.000000000000 3.000000000000 34.000000000000 41.000000000000 42.000000000000 } }
40 | window_filter_module evt_type 1 50000001
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/p2p_bytes_sent.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW p2p bytes sent >
7 | ################################################################################
8 | window_name p2p bytes sent
9 | window_type single
10 | window_id 1
11 | window_position_x 590
12 | window_position_y 596
13 | window_width 592
14 | window_height 302
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 13421772.000000
26 | window_minimum_y 0.000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open false
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Sent Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_semantic_module task Thread i { 1, { 1 0.000000 } }
39 | window_filter_module evt_type 1 50000001
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/p2p_send_size.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Bytes sent btw events >
7 | ################################################################################
8 | window_name Bytes sent btw events
9 | window_type single
10 | window_id 1
11 | window_position_x 392
12 | window_position_y 419
13 | window_width 600
14 | window_height 244
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 4181568.000000
26 | window_minimum_y 0.000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open false
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Sent Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_semantic_module task Thread i { 1, { 1 0.000000 } }
39 | window_filter_module evt_type 1 50000001
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/p2p_size.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 | begin_description
4 | Bytes sent/recevied within point to point calls
5 | end_description
6 |
7 | ################################################################################
8 | < NEW DISPLAYING WINDOW Bytes btw events >
9 | ################################################################################
10 | window_name Bytes btw events
11 | window_type single
12 | window_id 1
13 | window_position_x 371
14 | window_position_y 449
15 | window_width 600
16 | window_height 114
17 | window_comm_lines_enabled false
18 | window_color_mode window_in_null_gradient_mode
19 | window_compute_y_max
20 | window_scale_relative 1.000000
21 | window_object appl { 1, { All } }
22 | window_begin_time_relative 0.000000000000
23 | window_pos_to_disp 598
24 | window_pos_of_x_scale 18
25 | window_pos_of_y_scale 80
26 | window_number_of_row 32
27 | window_click_options 1 0 1 0 0 0
28 | window_click_info 1 7938040900 8210825810 26 8074433355
29 | window_expanded false
30 | window_open false
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 8, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose, As Is} } }
35 | window_analyzer_executed 0
36 | window_analyzer_info 0.000000 0.000000 0 0
37 | window_filter_module evt_type 1 50000001
38 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/total_bw.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Total Send bandwidth (MB/s) >
7 | ################################################################################
8 | window_name Total Send bandwidth (MB/s)
9 | window_type single
10 | window_id 1
11 | window_position_x 596
12 | window_position_y 190
13 | window_width 600
14 | window_height 143
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode false
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 10.400097390318
26 | window_minimum_y 0.000000000000
27 | window_compute_y_max false
28 | window_level appl
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { 1 } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Send BandWidth}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_semantic_module thread Send BandWidth { 1, { 1 1.000000000000 } }
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/total_bytes_btw_events.cfg:
--------------------------------------------------------------------------------
1 | version 3.4
2 | number_of_windows 1
3 |
4 | ################################################################################
5 | < NEW DISPLAYING WINDOW Total bytes btw events >
6 | ################################################################################
7 | window_name Total bytes btw events
8 | window_type single
9 | window_id 1
10 | window_position_x 529
11 | window_position_y 374
12 | window_width 600
13 | window_height 302
14 | window_comm_lines_enabled false
15 | window_noncolor_mode false
16 | window_color_mode window_in_null_gradient_mode
17 | window_maximum_y 441440.000000
18 | window_minimum_y 320.000000
19 | window_level appl
20 | window_scale_relative 1.000000
21 | window_object appl { 1, { 1 } }
22 | window_begin_time_relative 0.000000000000
23 | window_pos_to_disp 597
24 | window_pos_of_x_scale 18
25 | window_pos_of_y_scale 115
26 | window_number_of_row 1
27 | window_click_options 1 0 1 1 0 0
28 | window_click_info 1 31861509 31863359 0 31862434
29 | window_expanded false
30 | window_open false
31 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Thread i}, {thread, Event Bytes}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
32 | window_semantic_module thread Avg Next Evt Val { 1, { 1 1000.000000 } }
33 | window_semantic_module thread Avg Last Evt Val { 1, { 1 1000.000000 } }
34 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
35 | window_semantic_module compose_thread Prod { 1, { 1 1.000000 } }
36 | window_analyzer_executed 0
37 | window_analyzer_info 0.000000 0.000000 0 0
38 | window_filter_module evt_type 1 50000001
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/mpi/views/point2point/total_msgs_in_transit.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Total msgs in transit >
7 | ################################################################################
8 | window_name Total msgs in transit
9 | window_type single
10 | window_id 1
11 | window_position_x 406
12 | window_position_y 60
13 | window_width 600
14 | window_height 143
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode false
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 20.000000000000
25 | window_minimum_y 1.000000000000
26 | window_compute_y_max false
27 | window_level appl
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { 1 } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Send Messages in Transit}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, All}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 |
39 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/cuda/CUDA_runtime.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW CUDA Runtime >
7 | ################################################################################
8 | window_name CUDA Runtime
9 | window_type single
10 | window_id 1
11 | window_position_x 487
12 | window_position_y 225
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled true
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 30.000000000000
25 | window_minimum_y 1.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open false
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module tag_msg 1 1
39 | window_filter_module evt_type 1 9200027
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/data_mgmgt/cache_waiting_for.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Cache waiting for something >
7 | ################################################################################
8 | window_name Cache waiting for something
9 | window_type single
10 | window_id 1
11 | window_position_x 584
12 | window_position_y 165
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled false
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 30.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module evt_type 1 9200029
39 |
40 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/data_mgmgt/data_tx.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 | Data transfers between host and devices
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Data Transfers >
7 | ################################################################################
8 | window_name Data Transfers
9 | window_type single
10 | window_id 1
11 | window_position_x 2317
12 | window_position_y 254
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled true
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 4.000000000000
25 | window_minimum_y 1.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module tag_msg 1 1
39 | window_filter_module evt_type 1 9200060
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/graph_and_scheduling/creating_submitting_task.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Creating/submitting task >
7 | ################################################################################
8 | window_name Creating/submitting task
9 | window_type single
10 | window_id 1
11 | window_position_x 650
12 | window_position_y 269
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 27.000000000000
25 | window_minimum_y 2.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open false
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, !=}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, Select Range} } }
38 | window_semantic_module topcompose2 Select Range { 2, { 1 18.000000000000, 1 17.000000000000 } }
39 | window_filter_module tag_msg 1 1
40 | window_filter_module evt_type 1 9200001
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/graph_and_scheduling/nb_concurrent_ready.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW # concurrent tasks in ready >
7 | ################################################################################
8 | window_name # concurrent tasks in ready
9 | window_type single
10 | window_id 1
11 | window_position_x 630
12 | window_position_y 277
13 | window_width 826
14 | window_height 398
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode false
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 158.000000000000
26 | window_minimum_y 1.000000000000
27 | window_compute_y_max true
28 | window_level task
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_filter_module evt_type 1 9200050
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/graph_and_scheduling/nb_ready_tasks.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW # ready tasks in queue >
7 | ################################################################################
8 | window_name # ready tasks in queue
9 | window_type single
10 | window_id 1
11 | window_position_x 630
12 | window_position_y 277
13 | window_width 826
14 | window_height 398
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode false
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 158.000000000000
26 | window_minimum_y 1.000000000000
27 | window_compute_y_max true
28 | window_level task
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_filter_module evt_type 1 9200022
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/graph_and_scheduling/nb_tasks_in_graph.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Number of tasks in graph >
7 | ################################################################################
8 | window_name Number of tasks in graph
9 | window_type single
10 | window_id 1
11 | window_position_x 648
12 | window_position_y 118
13 | window_width 623
14 | window_height 111
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode false
18 | window_color_mode window_in_null_gradient_mode
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Nanoseconds
25 | window_maximum_y 2714.000000000000
26 | window_minimum_y 1.000000000000
27 | window_compute_y_max false
28 | window_level task
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_filter_module evt_type 1 9200023
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/graph_and_scheduling/versioning_sched.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 | Versioning scheduler decisions
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Versioning Scheduler >
7 | ################################################################################
8 | window_name Versioning Scheduler
9 | window_type single
10 | window_id 1
11 | window_position_x 1916
12 | window_position_y 397
13 | window_width 958
14 | window_height 115
15 | window_comm_lines_enabled false
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Nanoseconds
24 | window_maximum_y 0.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max true
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, =} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module evt_type 1 9200035
39 | window_filter_module evt_value 18 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/opencl/opencl_runtime.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 | Inside OpenCL runtime
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW OpenCL Runtime >
7 | ################################################################################
8 | window_name OpenCL Runtime
9 | window_type single
10 | window_id 1
11 | window_position_x 518
12 | window_position_y 238
13 | window_width 600
14 | window_height 115
15 | window_comm_lines_enabled true
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 16.000000000000
25 | window_minimum_y 1.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module tag_msg 1 1
39 | window_filter_module evt_type 1 9200039
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/async_thread_state.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Async thread state >
7 | ################################################################################
8 | window_name Async thread state
9 | window_type single
10 | window_id 1
11 | window_position_x 1870
12 | window_position_y 297
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled true
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 7.000000000000
25 | window_minimum_y 2.000000000000
26 | window_compute_y_max true
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_labels_to_draw 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, None}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Stacked Val}, {topcompose2, As Is} } }
39 | window_filter_module tag_msg 1 1
40 | window_filter_module evt_type 1 9200052
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/nanos_API.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW NANOS API >
7 | ################################################################################
8 | window_name NANOS API
9 | window_type single
10 | window_id 1
11 | window_position_x 2081
12 | window_position_y 389
13 | window_width 641
14 | window_height 192
15 | window_comm_lines_enabled false
16 | window_flags_enabled true
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Nanoseconds
24 | window_maximum_y 43.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module evt_type 1 9200001
39 |
40 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/nanos_locks.cfg:
--------------------------------------------------------------------------------
1 | #ParaverCFG
2 | ConfigFile.Version: 3.4
3 | ConfigFile.NumWindows: 1
4 |
5 |
6 | ################################################################################
7 | < NEW DISPLAYING WINDOW Nanos lock >
8 | ################################################################################
9 | window_name Nanos lock
10 | window_type single
11 | window_id 1
12 | window_position_x 414
13 | window_position_y 521
14 | window_width 641
15 | window_height 115
16 | window_comm_lines_enabled false
17 | window_flags_enabled true
18 | window_noncolor_mode true
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 1.000000000000
26 | window_minimum_y 1.000000000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_labels_to_draw 1
38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Sign}, {topcompose2, As Is} } }
40 | window_filter_module tag_msg 1 1
41 | window_filter_module evt_type 1 9200017
42 |
43 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/num_threads.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Number of Threads >
7 | ################################################################################
8 | window_name Number of Threads
9 | window_type single
10 | window_id 1
11 | window_position_x 480
12 | window_position_y 343
13 | window_width 735
14 | window_height 248
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 24.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level task
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_labels_to_draw 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Changed value}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_filter_module tag_msg 1 1
40 | window_filter_module evt_type 1 9200041
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/thread_cpuid.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW Thread cpuid >
7 | ################################################################################
8 | window_name Thread cpuid
9 | window_type single
10 | window_id 1
11 | window_position_x 480
12 | window_position_y 638
13 | window_width 735
14 | window_height 248
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 24.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_labels_to_draw 1
37 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
38 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
39 | window_filter_module tag_msg 1 1
40 | window_filter_module evt_type 1 9200042
41 |
42 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/thread_numa_node.cfg:
--------------------------------------------------------------------------------
1 | #ParaverCFG
2 | ConfigFile.Version: 3.4
3 | ConfigFile.NumWindows: 1
4 |
5 |
6 | ################################################################################
7 | < NEW DISPLAYING WINDOW Thread NUMA node >
8 | ################################################################################
9 | window_name Thread NUMA node
10 | window_type single
11 | window_id 1
12 | window_position_x 480
13 | window_position_y 638
14 | window_width 735
15 | window_height 248
16 | window_comm_lines_enabled false
17 | window_flags_enabled false
18 | window_noncolor_mode true
19 | window_logical_filtered true
20 | window_physical_filtered false
21 | window_comm_fromto true
22 | window_comm_tagsize true
23 | window_comm_typeval true
24 | window_units Microseconds
25 | window_maximum_y 24.000000000000
26 | window_minimum_y 0.000000000000
27 | window_compute_y_max false
28 | window_level thread
29 | window_scale_relative 1.000000000000
30 | window_end_time_relative 1.000000000000
31 | window_object appl { 1, { All } }
32 | window_begin_time_relative 0.000000000000
33 | window_open true
34 | window_drawmode 1
35 | window_drawmode_rows 1
36 | window_pixel_size 1
37 | window_labels_to_draw 1
38 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
39 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
40 | window_filter_module tag_msg 1 1
41 | window_filter_module evt_type 1 9200064
42 |
43 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/runtime/thread_state.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW thread state >
7 | ################################################################################
8 | window_name thread state
9 | window_type single
10 | window_id 1
11 | window_position_x 425
12 | window_position_y 35
13 | window_width 641
14 | window_height 115
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 15.000000000000
25 | window_minimum_y 0.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, =}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, Stacked Val}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, As Is}, {topcompose2, As Is} } }
38 | window_filter_module tag_msg 1 1
39 | window_filter_module evt_type 1 9000000
40 |
41 |
--------------------------------------------------------------------------------
/paraver-cfgs/ompss/tasks/in_task.cfg:
--------------------------------------------------------------------------------
1 | ConfigFile.Version: 3.4
2 | ConfigFile.NumWindows: 1
3 |
4 |
5 | ################################################################################
6 | < NEW DISPLAYING WINDOW In task >
7 | ################################################################################
8 | window_name In task
9 | window_type single
10 | window_id 1
11 | window_position_x 501
12 | window_position_y 47
13 | window_width 641
14 | window_height 195
15 | window_comm_lines_enabled false
16 | window_flags_enabled false
17 | window_noncolor_mode true
18 | window_logical_filtered true
19 | window_physical_filtered false
20 | window_comm_fromto true
21 | window_comm_tagsize true
22 | window_comm_typeval true
23 | window_units Microseconds
24 | window_maximum_y 6.000000000000
25 | window_minimum_y 1.000000000000
26 | window_compute_y_max false
27 | window_level thread
28 | window_scale_relative 1.000000000000
29 | window_end_time_relative 1.000000000000
30 | window_object appl { 1, { All } }
31 | window_begin_time_relative 0.000000000000
32 | window_open true
33 | window_drawmode 1
34 | window_drawmode_rows 1
35 | window_pixel_size 1
36 | window_selected_functions { 14, { {cpu, Active Thd}, {appl, Adding}, {task, Adding}, {thread, Last Evt Val}, {node, Adding}, {system, Adding}, {workload, Adding}, {from_obj, All}, {to_obj, All}, {tag_msg, All}, {size_msg, All}, {bw_msg, All}, {evt_type, =}, {evt_value, All} } }
37 | window_compose_functions { 9, { {compose_cpu, As Is}, {compose_appl, As Is}, {compose_task, As Is}, {compose_thread, As Is}, {compose_node, As Is}, {compose_system, As Is}, {compose_workload, As Is}, {topcompose1, Sign}, {topcompose2, As Is} } }
38 | window_filter_module evt_type 1 9200011
39 |
40 |
--------------------------------------------------------------------------------