├── OpenMP ├── OpenMPNEON.cpp ├── OpenMPSSE.cpp └── OpenMPSpecialGauss.cpp ├── Pthread ├── PthreadNEON.cpp ├── PthreadSSE AVX.cpp └── PthreadSpecialGauss.cpp ├── SIMD ├── Guass.cpp ├── NEON.cpp └── SpecialGuass.cpp ├── cuda_learning ├── 0.cu ├── 1.cu ├── 3.cu └── 4.cu ├── final ├── big_scale_IO.cpp ├── big_scale_IO2.cpp ├── bitmap_store.cpp ├── sparse_store.cpp ├── wrong_pthread.cpp └── wrong_pthread_improved.cpp ├── homework1 ├── main1.cpp └── main2.cpp ├── mpi ├── mpi.cpp ├── mpi_improved.cpp ├── mpi_omp.cpp ├── mpi_omp_simd.cpp ├── mpi_pipeline.cpp └── mpi_specialGauss.cpp └── readme.md /OpenMP/OpenMPNEON.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/OpenMP/OpenMPNEON.cpp -------------------------------------------------------------------------------- /OpenMP/OpenMPSSE.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/OpenMP/OpenMPSSE.cpp -------------------------------------------------------------------------------- /OpenMP/OpenMPSpecialGauss.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/OpenMP/OpenMPSpecialGauss.cpp -------------------------------------------------------------------------------- /Pthread/PthreadNEON.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/Pthread/PthreadNEON.cpp -------------------------------------------------------------------------------- /Pthread/PthreadSSE AVX.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/Pthread/PthreadSSE AVX.cpp -------------------------------------------------------------------------------- /Pthread/PthreadSpecialGauss.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/Pthread/PthreadSpecialGauss.cpp -------------------------------------------------------------------------------- /SIMD/Guass.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/SIMD/Guass.cpp -------------------------------------------------------------------------------- /SIMD/NEON.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | using namespace std; 5 | const int N=1024; 6 | float a[N][N]; 7 | void init() 8 | { 9 | for(int i=0;itv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 141 | cout << " SequentialAlgorithm time: " << double(durationTime) << " ms" << endl; 142 | 143 | init(); 144 | gettimeofday(start,NULL); 145 | ParallelAlgorithm(); 146 | gettimeofday(stop,NULL); 147 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 148 | cout << " ParallelAlgorithm time: " << double(durationTime) << " ms" << endl; 149 | 150 | init(); 151 | gettimeofday(start,NULL); 152 | AlignedParallelAlgorithm(); 153 | gettimeofday(stop,NULL); 154 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 155 | cout << " AlignedParallelAlgorithm time: " << double(durationTime) << " ms" << endl; 156 | 157 | return 0; 158 | } 159 | -------------------------------------------------------------------------------- /SIMD/SpecialGuass.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MoonLight0123/parallel_repository/4f3979fa2e1dca5a20350d4d2c01ecc85a7c6ce1/SIMD/SpecialGuass.cpp -------------------------------------------------------------------------------- /cuda_learning/0.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * Initialize array values on the host. 5 | */ 6 | 7 | void init(int *a, int N) 8 | { 9 | int i; 10 | for (i = 0; i < N; ++i) 11 | { 12 | a[i] = i; 13 | } 14 | } 15 | 16 | /* 17 | * Double elements in parallel on the GPU. 18 | */ 19 | 20 | __global__ 21 | void doubleElements(int *a, int N) 22 | { 23 | int i; 24 | i = blockIdx.x * blockDim.x + threadIdx.x; 25 | if (i < N) 26 | { 27 | a[i] *= 2; 28 | } 29 | } 30 | 31 | /* 32 | * Check all elements have been doubled on the host. 33 | */ 34 | 35 | bool checkElementsAreDoubled(int *a, int N) 36 | { 37 | int i; 38 | for (i = 0; i < N; ++i) 39 | { 40 | if (a[i] != i*2) return false; 41 | } 42 | return true; 43 | } 44 | 45 | int main() 46 | { 47 | int N = 100; 48 | int *a; 49 | 50 | size_t size = N * sizeof(int); 51 | 52 | /* 53 | * Refactor this memory allocation to provide a pointer 54 | * `a` that can be used on both the host and the device. 55 | */ 56 | 57 | //a = (int *)malloc(size); 58 | cudaMallocManaged(&a, size); 59 | init(a, N); 60 | 61 | size_t threads_per_block = 10; 62 | size_t number_of_blocks = 10; 63 | 64 | /* 65 | * This launch will not work until the pointer `a` is also 66 | * available to the device. 67 | */ 68 | 69 | doubleElements<<>>(a, N); 70 | cudaDeviceSynchronize(); 71 | 72 | bool areDoubled = checkElementsAreDoubled(a, N); 73 | printf("All elements were doubled? %s\n", areDoubled ? "TRUE" : "FALSE"); 74 | 75 | /* 76 | * Refactor to free memory that has been allocated to be 77 | * accessed by both the host and the device. 78 | */ 79 | 80 | cudaFree(a); 81 | } 82 | -------------------------------------------------------------------------------- /cuda_learning/1.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * Refactor firstParallel so that it can run on the GPU. 5 | */ 6 | 7 | __global__ void firstParallel() 8 | { 9 | printf("This should be running in parallel.\n"); 10 | } 11 | 12 | int main() 13 | { 14 | /* 15 | * Refactor this call to firstParallel to execute in parallel 16 | * on the GPU. 17 | */ 18 | 19 | firstParallel<<<3,3>>>(); 20 | cudaDeviceSynchronize(); 21 | /* 22 | * Some code is needed below so that the CPU will wait 23 | * for the GPU kernels to complete before proceeding. 24 | */ 25 | 26 | } 27 | 28 | #include 29 | 30 | __global__ void printSuccessForCorrectExecutionConfiguration() 31 | { 32 | 33 | if(threadIdx.x == 1023 && blockIdx.x == 255) 34 | { 35 | printf("Success!\n"); 36 | } 37 | } 38 | 39 | int main() 40 | { 41 | /* 42 | * This is one possible execution context that will make 43 | * the kernel launch print its success message. 44 | */ 45 | 46 | printSuccessForCorrectExecutionConfiguration<<<256, 1024>>>(); 47 | 48 | /* 49 | * Don't forget kernel execution is asynchronous and you must 50 | * sync on its completion. 51 | */ 52 | 53 | cudaDeviceSynchronize(); 54 | } -------------------------------------------------------------------------------- /cuda_learning/3.cu: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | /* 4 | * Host function to initialize vector elements. This function 5 | * simply initializes each element to equal its index in the 6 | * vector. 7 | */ 8 | 9 | void initWith(float num, float *a, int N) 10 | { 11 | for(int i = 0; i < N; ++i) 12 | { 13 | a[i] = num; 14 | } 15 | } 16 | 17 | /* 18 | * Device kernel stores into `result` the sum of each 19 | * same-indexed value of `a` and `b`. 20 | */ 21 | 22 | __global__ 23 | void addVectorsInto(float *result, float *a, float *b, int N) 24 | { 25 | int index = threadIdx.x + blockIdx.x * blockDim.x; 26 | int stride = blockDim.x * gridDim.x; 27 | 28 | for(int i = index; i < N; i += stride) 29 | { 30 | result[i] = a[i] + b[i]; 31 | } 32 | } 33 | 34 | /* 35 | * Host function to confirm values in `vector`. This function 36 | * assumes all values are the same `target` value. 37 | */ 38 | 39 | void checkElementsAre(float target, float *vector, int N) 40 | { 41 | for(int i = 0; i < N; i++) 42 | { 43 | if(vector[i] != target) 44 | { 45 | printf("FAIL: vector[%d] - %0.0f does not equal %0.0f\n", i, vector[i], target); 46 | exit(1); 47 | } 48 | } 49 | printf("Success! All values calculated correctly.\n"); 50 | } 51 | 52 | int main() 53 | { 54 | const int N = 2<<24; 55 | size_t size = N * sizeof(float); 56 | 57 | float *a; 58 | float *b; 59 | float *c; 60 | 61 | cudaMallocManaged(&a, size); 62 | cudaMallocManaged(&b, size); 63 | cudaMallocManaged(&c, size); 64 | 65 | initWith(3, a, N); 66 | initWith(4, b, N); 67 | initWith(0, c, N); 68 | 69 | size_t threadsPerBlock; 70 | size_t numberOfBlocks; 71 | 72 | /* 73 | * nsys should register performance changes when execution configuration 74 | * is updated. 75 | */ 76 | 77 | threadsPerBlock = 1000; 78 | numberOfBlocks = 1024; 79 | 80 | cudaError_t addVectorsErr; 81 | cudaError_t asyncErr; 82 | 83 | addVectorsInto<<>>(c, a, b, N); 84 | 85 | addVectorsErr = cudaGetLastError(); 86 | if(addVectorsErr != cudaSuccess) printf("Error: %s\n", cudaGetErrorString(addVectorsErr)); 87 | 88 | asyncErr = cudaDeviceSynchronize(); 89 | if(asyncErr != cudaSuccess) printf("Error: %s\n", cudaGetErrorString(asyncErr)); 90 | 91 | checkElementsAre(7, c, N); 92 | 93 | cudaFree(a); 94 | cudaFree(b); 95 | cudaFree(c); 96 | } -------------------------------------------------------------------------------- /cuda_learning/4.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "timer.h" 5 | #include "files.h" 6 | 7 | #define SOFTENING 1e-9f 8 | 9 | /* 10 | * Each body contains x, y, and z coordinate positions, 11 | * as well as velocities in the x, y, and z directions. 12 | */ 13 | 14 | typedef struct { float x, y, z, vx, vy, vz; } Body; 15 | 16 | /* 17 | * Calculate the gravitational impact of all bodies in the system 18 | * on all others. 19 | */ 20 | 21 | __global__ 22 | void integratePosition(Body *p, float dt,int n) { 23 | int index = threadIdx.x + blockIdx.x * blockDim.x; 24 | int stride = blockDim.x * gridDim.x; 25 | for (int i = index; i < n; i += stride) { 26 | p[i].x += p[i].vx*dt; 27 | p[i].y += p[i].vy*dt; 28 | p[i].z += p[i].vz*dt; 29 | } 30 | } 31 | 32 | __global__ 33 | void bodyForce(Body *p, float dt, int n) { 34 | int index = threadIdx.x + blockIdx.x * blockDim.x; 35 | int stride = blockDim.x * gridDim.x; 36 | for (int i = index; i < n; i += stride) { 37 | float Fx = 0.0f; float Fy = 0.0f; float Fz = 0.0f; 38 | for (int j = 0; j < n; j++) { 39 | float dx = p[j].x - p[i].x; 40 | float dy = p[j].y - p[i].y; 41 | float dz = p[j].z - p[i].z; 42 | float distSqr = dx*dx + dy*dy + dz*dz + SOFTENING; 43 | float invDist = rsqrtf(distSqr); 44 | float invDist3 = invDist * invDist * invDist; 45 | Fx += dx * invDist3; Fy += dy * invDist3; Fz += dz * invDist3; 46 | } 47 | 48 | p[i].vx += dt*Fx; p[i].vy += dt*Fy; p[i].vz += dt*Fz; 49 | } 50 | } 51 | 52 | 53 | int main(const int argc, const char** argv) { 54 | 55 | // The assessment will test against both 2<11 and 2<15. 56 | // Feel free to pass the command line argument 15 when you generate ./nbody report files 57 | int nBodies = 2<<11; 58 | if (argc > 1) nBodies = 2< 2) initialized_values = argv[2]; 74 | if (argc > 3) solution_values = argv[3]; 75 | 76 | const float dt = 0.01f; // Time step 77 | const int nIters = 10; // Simulation iterations 78 | 79 | int deviceId; 80 | int numberofSMs; 81 | cudaGetDevice(&deviceId); 82 | cudaDeviceGetAttribute(&numberofSMs, cudaDevAttrMultiProcessorCount, deviceId); 83 | int bytes = nBodies * sizeof(Body); 84 | float *buf; 85 | cudaMallocManaged(&buf, bytes); 86 | 87 | Body *p = (Body*)buf; 88 | 89 | read_values_from_file(initialized_values, buf, bytes); 90 | 91 | double totalTime = 0.0; 92 | size_t threadsPerBlock; 93 | size_t numberOfBlocks; 94 | 95 | threadsPerBlock = 256; 96 | numberOfBlocks = 32 * numberofSMs; 97 | 98 | cudaMemPrefetchAsync(p, bytes, deviceId); 99 | 100 | /* 101 | * This simulation will run for 10 cycles of time, calculating gravitational 102 | * interaction amongst bodies, and adjusting their positions to reflect. 103 | */ 104 | 105 | for (int iter = 0; iter < nIters; iter++) { 106 | StartTimer(); 107 | 108 | /* 109 | * You will likely wish to refactor the work being done in `bodyForce`, 110 | * and potentially the work to integrate the positions. 111 | */ 112 | 113 | bodyForce<<>>(p, dt, nBodies); // compute interbody forces 114 | 115 | /* 116 | * This position integration cannot occur until this round of `bodyForce` has completed. 117 | * Also, the next round of `bodyForce` cannot begin until the integration is complete. 118 | */ 119 | 120 | integratePosition<<>>(p,dt, nBodies); 121 | cudaDeviceSynchronize(); 122 | const double tElapsed = GetTimer() / 1000.0; 123 | totalTime += tElapsed; 124 | } 125 | 126 | double avgTime = totalTime / (double)(nIters); 127 | float billionsOfOpsPerSecond = 1e-9 * nBodies * nBodies / avgTime; 128 | write_values_to_file(solution_values, buf, bytes); 129 | 130 | // You will likely enjoy watching this value grow as you accelerate the application, 131 | // but beware that a failure to correctly synchronize the device might result in 132 | // unrealistically high values. 133 | printf("%0.3f Billion Interactions / second\n", billionsOfOpsPerSecond); 134 | 135 | free(buf); 136 | } 137 | -------------------------------------------------------------------------------- /final/big_scale_IO.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #pragma comment(lib, "pthreadVC2.lib") 12 | using namespace std; 13 | const int maxN = 25000;//采用位图存储,消元子和消元行的个数之和 14 | const int maxM = 1000000;//矩阵列数 15 | const int inputERow = 1717; 16 | int eNum = 10000; 17 | const int rNum = 15000; 18 | int R[maxN][maxM / 32 + 1];//消元子 19 | int E[maxN][maxM / 32 + 1];//被消元行 20 | int rPos[maxM];//rPos[i]表示首项为i的消元行在R中第rPos行 21 | int ePos[maxN];//ePos[i]表示E第i行首项的位置 22 | int rNumNow, eNumNow, eNumOrigin; 23 | int gettimeofday(struct timeval *tp, void *tzp) 24 | { 25 | time_t clock; 26 | struct tm tm; 27 | SYSTEMTIME wtm; 28 | GetLocalTime(&wtm); 29 | tm.tm_year = wtm.wYear - 1900; 30 | tm.tm_mon = wtm.wMonth - 1; 31 | tm.tm_mday = wtm.wDay; 32 | tm.tm_hour = wtm.wHour; 33 | tm.tm_min = wtm.wMinute; 34 | tm.tm_sec = wtm.wSecond; 35 | tm.tm_isdst = -1; 36 | clock = mktime(&tm); 37 | tp->tv_sec = clock; 38 | tp->tv_usec = wtm.wMilliseconds * 1000; 39 | return 0; 40 | } 41 | inline void setBit(int& a, int pos, int flag)//置位为flag 42 | { 43 | if (flag) 44 | a |= (1 << pos); 45 | else 46 | a &= ~(1 << pos); 47 | } 48 | inline int getBit(int& a, int pos) 49 | { 50 | return (a >> pos) & 1; 51 | } 52 | void inputR() 53 | { 54 | ifstream infile("消元子.txt"); 55 | int temp; 56 | bool newLine = true; 57 | for (int i = 0; i < maxM; i++) 58 | rPos[i] = -1; 59 | while (infile >> temp) 60 | { 61 | if (newLine) 62 | { 63 | rPos[temp] = rNumNow; 64 | newLine = false; 65 | } 66 | int pos1, pos2; 67 | pos1 = temp / 32; 68 | pos2 = temp % 32; 69 | setBit(R[rNumNow][pos1], pos2, 1); 70 | infile.get(); 71 | if (infile.peek() == '\n') 72 | { 73 | infile.get(); 74 | rNumNow++; 75 | newLine = true; 76 | } 77 | } 78 | infile.close(); 79 | 80 | newLine = true; 81 | ifstream infil("被消元行.txt"); 82 | while (infil >> temp) 83 | { 84 | if (newLine) 85 | { 86 | ePos[eNumNow] = temp; 87 | newLine = false; 88 | } 89 | int pos1, pos2; 90 | pos1 = temp / 32; 91 | pos2 = temp % 32; 92 | setBit(E[eNumNow][pos1], pos2, 1); 93 | infil.get(); 94 | if (infil.peek() == '\n') 95 | { 96 | infil.get(); 97 | eNumNow++; 98 | newLine = true; 99 | } 100 | } 101 | eNumOrigin = eNumNow; 102 | infil.close(); 103 | //rPos[-1] = -1; 104 | } 105 | void outputE() 106 | { 107 | string dirName = "result.txt"; 108 | remove(dirName.c_str()); 109 | ofstream outfile; 110 | outfile.open(dirName, ios::app); 111 | for (int i = 0; i < eNumOrigin; i++) 112 | { 113 | if (!E[i][0] && ePos[i] <= 31) 114 | continue;//空行跳过 115 | 116 | for (int d = ePos[i] / 32; d >= 0; d--) 117 | { 118 | for (int j = 31; j >= 0; j--) 119 | { 120 | if (getBit(E[i][d], j)) 121 | outfile << j + d * 32 << ' '; 122 | } 123 | } 124 | outfile << endl; 125 | } 126 | } 127 | void eliminate() 128 | { 129 | for (int i = 0; i < eNumOrigin; i++) 130 | { 131 | while (rPos[ePos[i]] != -1) 132 | { 133 | int d = ePos[i] / 32, newEpos = -1; 134 | for (int j = d; j >= 0; j--) 135 | { 136 | E[i][j] ^= R[rPos[ePos[i]]][j]; 137 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 138 | {//未更新的条件为被消元行消为0 139 | for (int k = 31; k >= 0; k--) 140 | if (getBit(E[i][j], k)) 141 | { 142 | newEpos = 32 * j + k; 143 | break; 144 | } 145 | } 146 | } 147 | ePos[i] = newEpos; 148 | if (newEpos == -1)break;//该行消为空行 149 | } 150 | if (ePos[i] != -1) 151 | { 152 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 153 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 154 | rNumNow++; eNumNow--; 155 | } 156 | } 157 | } 158 | void inputE() 159 | { 160 | int temp; 161 | bool newLine = true; 162 | ifstream infil("被消元行.txt"); 163 | for(int i=0;i<=inputERow&&eNum;++i,--eNum) 164 | { 165 | if (newLine) 166 | { 167 | ePos[eNumNow] = temp; 168 | newLine = false; 169 | } 170 | int pos1, pos2; 171 | pos1 = temp / 32; 172 | pos2 = temp % 32; 173 | setBit(E[eNumNow][pos1], pos2, 1); 174 | infil.get(); 175 | if (infil.peek() == '\n') 176 | { 177 | infil.get(); 178 | eNumNow++; 179 | newLine = true; 180 | } 181 | } 182 | eNumOrigin = eNumNow; 183 | infil.close(); 184 | } 185 | void bigScaleIO() 186 | { 187 | inputR(); 188 | for (int k = 0; k < eNum; k += inputERow) 189 | { 190 | inputE(); 191 | for (int i = k; i < inputERow&&i < eNum; i++) 192 | { 193 | while (rPos[ePos[i]] != -1) 194 | { 195 | int d = ePos[i] / 32, newEpos = -1; 196 | for (int j = d; j >= 0; j--) 197 | { 198 | E[i][j] ^= R[rPos[ePos[i]]][j]; 199 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 200 | {//未更新的条件为被消元行消为0 201 | for (int k = 31; k >= 0; k--) 202 | if (getBit(E[i][j], k)) 203 | { 204 | newEpos = 32 * j + k; 205 | break; 206 | } 207 | } 208 | } 209 | ePos[i] = newEpos; 210 | if (newEpos == -1)break;//该行消为空行 211 | } 212 | if (ePos[i] != -1) 213 | { 214 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 215 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 216 | rNumNow++; eNumNow--; 217 | } 218 | } 219 | outputE(); 220 | } 221 | } 222 | int main() 223 | { 224 | timeval *tart = new timeval(); 225 | timeval *top = new timeval(); 226 | double urationTime = 0.0; 227 | 228 | timeval *start = new timeval(); 229 | timeval *stop = new timeval(); 230 | double durationTime = 0.0; 231 | 232 | gettimeofday(start, NULL); 233 | gettimeofday(tart, NULL); 234 | //eliminate(); 235 | bigScaleIO(); 236 | gettimeofday(top, NULL); 237 | gettimeofday(stop, NULL); 238 | //gettimeofday(stop, NULL); 239 | 240 | urationTime = top->tv_sec * 1000 + double(top->tv_usec) / 1000 - tart->tv_sec * 1000 - double(tart->tv_usec) / 1000; 241 | 242 | durationTime = stop->tv_sec * 1000 + double(stop->tv_usec) / 1000 - start->tv_sec * 1000 - double(start->tv_usec) / 1000; 243 | cout << " all time: " << double(durationTime) << " ms" << endl; 244 | cout << " SSEalgorithm time: " << double(urationTime) << " ms" << endl; 245 | cout << " io time: " << double(durationTime - urationTime) << " ms" << endl; 246 | 247 | return 0; 248 | } -------------------------------------------------------------------------------- /final/big_scale_IO2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #pragma comment(lib, "pthreadVC2.lib") 12 | using namespace std; 13 | const int maxN = 25000;//采用位图存储,消元子和消元行的个数之和 14 | const int maxM = 1000000;//矩阵列数 15 | const int inputERow = 10000; 16 | const int inputRRow = 10000; 17 | int eNum = 10000; 18 | const int rNum = 15000; 19 | int R[maxN][maxM / 32 + 1];//消元子 20 | int E[maxN][maxM / 32 + 1];//被消元行 21 | int rPos[maxM];//rPos[i]表示首项为i的消元行在R中第rPos行 22 | int ePos[maxN];//ePos[i]表示E第i行首项的位置 23 | int rNumNow, eNumNow, eNumOrigin; 24 | int gettimeofday(struct timeval *tp, void *tzp) 25 | { 26 | time_t clock; 27 | struct tm tm; 28 | SYSTEMTIME wtm; 29 | GetLocalTime(&wtm); 30 | tm.tm_year = wtm.wYear - 1900; 31 | tm.tm_mon = wtm.wMonth - 1; 32 | tm.tm_mday = wtm.wDay; 33 | tm.tm_hour = wtm.wHour; 34 | tm.tm_min = wtm.wMinute; 35 | tm.tm_sec = wtm.wSecond; 36 | tm.tm_isdst = -1; 37 | clock = mktime(&tm); 38 | tp->tv_sec = clock; 39 | tp->tv_usec = wtm.wMilliseconds * 1000; 40 | return 0; 41 | } 42 | inline void setBit(int& a, int pos, int flag)//置位为flag 43 | { 44 | if (flag) 45 | a |= (1 << pos); 46 | else 47 | a &= ~(1 << pos); 48 | } 49 | inline int getBit(int& a, int pos) 50 | { 51 | return (a >> pos) & 1; 52 | } 53 | void outputR() 54 | { 55 | string dirName = "result2.txt"; 56 | remove(dirName.c_str()); 57 | ofstream outfile; 58 | outfile.open(dirName, ios::app); 59 | for (int i = 0; i < eNumOrigin; i++) 60 | { 61 | if (!E[i][0] && ePos[i] <= 31) 62 | continue;//空行跳过 63 | 64 | for (int d = ePos[i] / 32; d >= 0; d--) 65 | { 66 | for (int j = 31; j >= 0; j--) 67 | { 68 | if (getBit(E[i][d], j)) 69 | outfile << j + d * 32 << ' '; 70 | } 71 | } 72 | outfile << endl; 73 | } 74 | } 75 | void inputR() 76 | { 77 | ifstream infile("消元子.txt"); 78 | int temp; 79 | bool newLine = true; 80 | for (int i = 0; i < maxM; i++) 81 | rPos[i] = -1; 82 | while (infile >> temp) 83 | { 84 | if (newLine) 85 | { 86 | rPos[temp] = rNumNow; 87 | newLine = false; 88 | } 89 | int pos1, pos2; 90 | pos1 = temp / 32; 91 | pos2 = temp % 32; 92 | setBit(R[rNumNow][pos1], pos2, 1); 93 | infile.get(); 94 | if (infile.peek() == '\n') 95 | { 96 | infile.get(); 97 | rNumNow++; 98 | newLine = true; 99 | } 100 | } 101 | infile.close(); 102 | 103 | newLine = true; 104 | ifstream infil("被消元行.txt"); 105 | while (infil >> temp) 106 | { 107 | if (newLine) 108 | { 109 | ePos[eNumNow] = temp; 110 | newLine = false; 111 | } 112 | int pos1, pos2; 113 | pos1 = temp / 32; 114 | pos2 = temp % 32; 115 | setBit(E[eNumNow][pos1], pos2, 1); 116 | infil.get(); 117 | if (infil.peek() == '\n') 118 | { 119 | infil.get(); 120 | eNumNow++; 121 | newLine = true; 122 | } 123 | } 124 | eNumOrigin = eNumNow; 125 | infil.close(); 126 | //rPos[-1] = -1; 127 | } 128 | void outputE() 129 | { 130 | string dirName = "result.txt"; 131 | remove(dirName.c_str()); 132 | ofstream outfile; 133 | outfile.open(dirName, ios::app); 134 | for (int i = 0; i < eNumOrigin; i++) 135 | { 136 | if (!E[i][0] && ePos[i] <= 31) 137 | continue;//空行跳过 138 | 139 | for (int d = ePos[i] / 32; d >= 0; d--) 140 | { 141 | for (int j = 31; j >= 0; j--) 142 | { 143 | if (getBit(E[i][d], j)) 144 | outfile << j + d * 32 << ' '; 145 | } 146 | } 147 | outfile << endl; 148 | } 149 | } 150 | void eliminate() 151 | { 152 | for (int i = 0; i < eNumOrigin; i++) 153 | { 154 | while (rPos[ePos[i]] != -1) 155 | { 156 | int d = ePos[i] / 32, newEpos = -1; 157 | for (int j = d; j >= 0; j--) 158 | { 159 | E[i][j] ^= R[rPos[ePos[i]]][j]; 160 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 161 | {//未更新的条件为被消元行消为0 162 | for (int k = 31; k >= 0; k--) 163 | if (getBit(E[i][j], k)) 164 | { 165 | newEpos = 32 * j + k; 166 | break; 167 | } 168 | } 169 | } 170 | ePos[i] = newEpos; 171 | if (newEpos == -1)break;//该行消为空行 172 | } 173 | if (ePos[i] != -1) 174 | { 175 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 176 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 177 | rNumNow++; eNumNow--; 178 | } 179 | } 180 | } 181 | void inputE() 182 | { 183 | int temp; 184 | bool newLine = true; 185 | ifstream infil("被消元行.txt"); 186 | for(int i=0;i<=inputERow&&eNum;++i,--eNum) 187 | { 188 | if (newLine) 189 | { 190 | ePos[eNumNow] = temp; 191 | newLine = false; 192 | } 193 | int pos1, pos2; 194 | pos1 = temp / 32; 195 | pos2 = temp % 32; 196 | setBit(E[eNumNow][pos1], pos2, 1); 197 | infil.get(); 198 | if (infil.peek() == '\n') 199 | { 200 | infil.get(); 201 | eNumNow++; 202 | newLine = true; 203 | } 204 | } 205 | eNumOrigin = eNumNow; 206 | infil.close(); 207 | } 208 | void bigScaleIO2() 209 | { 210 | for (int k = 0; k < eNum; k += inputERow) 211 | { 212 | inputE(); 213 | for (int i = k; i < inputERow&&i < eNum; i++) 214 | { 215 | for (int t = 0; t < rNum; t += inputRRow) 216 | { 217 | inputR(); 218 | while (rPos[ePos[i]] != -1 && i <= t) 219 | { 220 | int d = ePos[i] / 32, newEpos = -1; 221 | for (int j = d; j >= 0; j--) 222 | { 223 | E[i][j] ^= R[rPos[ePos[i]]][j]; 224 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 225 | {//未更新的条件为被消元行消为0 226 | for (int k = 31; k >= 0; k--) 227 | if (getBit(E[i][j], k)) 228 | { 229 | newEpos = 32 * j + k; 230 | break; 231 | } 232 | } 233 | } 234 | ePos[i] = newEpos; 235 | if (newEpos == -1)break;//该行消为空行 236 | } 237 | if (ePos[i] != -1) 238 | { 239 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 240 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 241 | rNumNow++; eNumNow--; 242 | } 243 | outputR(); 244 | } 245 | } 246 | outputE(); 247 | } 248 | } 249 | int main() 250 | { 251 | timeval *tart = new timeval(); 252 | timeval *top = new timeval(); 253 | double urationTime = 0.0; 254 | 255 | timeval *start = new timeval(); 256 | timeval *stop = new timeval(); 257 | double durationTime = 0.0; 258 | 259 | gettimeofday(start, NULL); 260 | gettimeofday(tart, NULL); 261 | //eliminate(); 262 | bigScaleIO2(); 263 | gettimeofday(top, NULL); 264 | gettimeofday(stop, NULL); 265 | //gettimeofday(stop, NULL); 266 | 267 | urationTime = top->tv_sec * 1000 + double(top->tv_usec) / 1000 - tart->tv_sec * 1000 - double(tart->tv_usec) / 1000; 268 | 269 | durationTime = stop->tv_sec * 1000 + double(stop->tv_usec) / 1000 - start->tv_sec * 1000 - double(start->tv_usec) / 1000; 270 | cout << " all time: " << double(durationTime) << " ms" << endl; 271 | cout << " SSEalgorithm time: " << double(urationTime) << " ms" << endl; 272 | cout << " io time: " << double(durationTime - urationTime) << " ms" << endl; 273 | 274 | return 0; 275 | } -------------------------------------------------------------------------------- /final/bitmap_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #pragma comment(lib, "pthreadVC2.lib") 12 | using namespace std; 13 | struct threadParam 14 | { 15 | int threadID, threadPos; 16 | }; 17 | const int maxN = 7000;//采用位图存储,消元子和消元行的个数之和 18 | const int maxM = 90000;//矩阵列数 19 | int R[maxN][maxM / 32 + 1];//消元子 20 | int E[maxN][maxM / 32 + 1];//被消元行 21 | int rPos[maxM];//rPos[i]表示首项为i的消元行在R中第rPos行 22 | int ePos[maxN];//ePos[i]表示E第i行首项的位置 23 | int rNumNow, eNumNow, eNumOrigin; 24 | const int threadCount = 1; 25 | pthread_t handle[threadCount]; 26 | sem_t semMain, semWorkstart[threadCount]; 27 | threadParam tp[threadCount]; 28 | int turn;//所有线程当前消去的轮次 29 | int s, e;//消去过程的起点终点 30 | int finish;//记录完成情况 31 | int gettimeofday(struct timeval *tp, void *tzp) 32 | { 33 | time_t clock; 34 | struct tm tm; 35 | SYSTEMTIME wtm; 36 | GetLocalTime(&wtm); 37 | tm.tm_year = wtm.wYear - 1900; 38 | tm.tm_mon = wtm.wMonth - 1; 39 | tm.tm_mday = wtm.wDay; 40 | tm.tm_hour = wtm.wHour; 41 | tm.tm_min = wtm.wMinute; 42 | tm.tm_sec = wtm.wSecond; 43 | tm.tm_isdst = -1; 44 | clock = mktime(&tm); 45 | tp->tv_sec = clock; 46 | tp->tv_usec = wtm.wMilliseconds * 1000; 47 | return 0; 48 | } 49 | inline void setBit(int& a, int pos, int flag)//置位为flag 50 | { 51 | if (flag) 52 | a |= (1 << pos); 53 | else 54 | a &= ~(1 << pos); 55 | } 56 | inline int getBit(int& a, int pos) 57 | { 58 | return (a >> pos) & 1; 59 | } 60 | void input() 61 | { 62 | ifstream infile("消元子.txt"); 63 | int temp; 64 | bool newLine = true; 65 | for (int i = 0; i < maxM; i++) 66 | rPos[i] = -1; 67 | while (infile >> temp) 68 | { 69 | if (newLine) 70 | { 71 | rPos[temp] = rNumNow; 72 | newLine = false; 73 | } 74 | int pos1, pos2; 75 | pos1 = temp / 32; 76 | pos2 = temp % 32; 77 | setBit(R[rNumNow][pos1], pos2, 1); 78 | infile.get(); 79 | if (infile.peek() == '\n') 80 | { 81 | infile.get(); 82 | rNumNow++; 83 | newLine = true; 84 | } 85 | } 86 | infile.close(); 87 | 88 | newLine = true; 89 | ifstream infil("被消元行.txt"); 90 | while (infil >> temp) 91 | { 92 | if (newLine) 93 | { 94 | ePos[eNumNow] = temp; 95 | newLine = false; 96 | } 97 | int pos1, pos2; 98 | pos1 = temp / 32; 99 | pos2 = temp % 32; 100 | setBit(E[eNumNow][pos1], pos2, 1); 101 | infil.get(); 102 | if (infil.peek() == '\n') 103 | { 104 | infil.get(); 105 | eNumNow++; 106 | newLine = true; 107 | } 108 | } 109 | eNumOrigin = eNumNow; 110 | infil.close(); 111 | //rPos[-1] = -1; 112 | } 113 | void output() 114 | { 115 | string dirName = "result.txt"; 116 | remove(dirName.c_str()); 117 | ofstream outfile; 118 | outfile.open(dirName, ios::app); 119 | outfile << "1************被消元行*************2" << endl; 120 | for (int i = 0; i < eNumOrigin; i++) 121 | { 122 | if (!E[i][0] && ePos[i] <= 31) 123 | continue;//空行跳过 124 | 125 | for (int d = ePos[i] / 32; d >= 0; d--) 126 | { 127 | for (int j = 31; j >= 0; j--) 128 | { 129 | if (getBit(E[i][d], j)) 130 | outfile << j + d * 32 << ' '; 131 | } 132 | } 133 | outfile << endl; 134 | } 135 | } 136 | void eliminate() 137 | { 138 | for (int i = 0; i < eNumOrigin; i++) 139 | { 140 | while (rPos[ePos[i]] != -1) 141 | { 142 | int d = ePos[i] / 32, newEpos = -1; 143 | for (int j = d; j >= 0; j--) 144 | { 145 | E[i][j] ^= R[rPos[ePos[i]]][j]; 146 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 147 | {//未更新的条件为被消元行消为0 148 | for (int k = 31; k >= 0; k--) 149 | if (getBit(E[i][j], k)) 150 | { 151 | newEpos = 32 * j + k; 152 | break; 153 | } 154 | } 155 | } 156 | ePos[i] = newEpos; 157 | if (newEpos == -1)break;//该行消为空行 158 | } 159 | if (ePos[i] != -1) 160 | { 161 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 162 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 163 | rNumNow++; eNumNow--; 164 | } 165 | } 166 | } 167 | void SSEeliminate() 168 | { 169 | __m128i t0, t1, t3; 170 | for (int i = 0; i < eNumOrigin; i++) 171 | { 172 | while (rPos[ePos[i]] != -1) 173 | { 174 | int d = ePos[i] / 32, newEpos = -1; 175 | d += (4 - d % 4);//d设置为4倍数，防止越界 176 | for (int j = d - 4; j >= 0; j -= 4) 177 | { 178 | t0 = _mm_loadu_si128((__m128i*)(E[i] + j)); 179 | t1 = _mm_loadu_si128((__m128i*)(R[rPos[ePos[i]]] + j)); 180 | t1 = _mm_xor_si128(t0, t1); 181 | _mm_storeu_si128((__m128i*)(E[i] + j), t1); 182 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 183 | {//未更新的条件为被消元行消为0 184 | t0 = _mm_xor_si128(t0, t0); 185 | t3 = _mm_cmpeq_epi16(t1, t0);//判断是否全部消为零 186 | int eq[4]; 187 | _mm_storeu_si128((__m128i*)(eq), t3); 188 | for (int m = 3; m >= 0; m--) 189 | { 190 | if (eq[m] != -1 && newEpos == -1) 191 | { 192 | for (int k = 31; k >= 0; k--) 193 | if (getBit(E[i][j + m], k)) 194 | { 195 | newEpos = 32 * (j + m) + k; 196 | break; 197 | } 198 | } 199 | } 200 | } 201 | } 202 | ePos[i] = newEpos; 203 | if (newEpos == -1)break;//该行消为空行 204 | } 205 | if (ePos[i] != -1) 206 | { 207 | rPos[ePos[i]] = rNumNow;//当空行时，该语句为rPos[-1]=rNumNow，需要跳过 208 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 209 | rNumNow++; eNumNow--; 210 | } 211 | } 212 | } 213 | void* pthreadFunction(void* param) 214 | { 215 | threadParam* p = (threadParam*)param; 216 | int id = p->threadID; 217 | while (1) 218 | { 219 | sem_wait(&semWorkstart[id]); 220 | int threadTurn = id + turn; 221 | if (threadTurn >= eNumOrigin) 222 | { 223 | setBit(finish, id, 0); 224 | return NULL; 225 | } 226 | while (ePos[threadTurn] >= e && ePos[threadTurn] <= s && rPos[ePos[threadTurn]] != -1) 227 | { 228 | int d = ePos[threadTurn] / 32, newEpos = -1; 229 | for (int j = d; j >= 0; j--) 230 | { 231 | E[threadTurn][j] ^= R[rPos[ePos[threadTurn]]][j]; 232 | if (newEpos == -1 && E[threadTurn][j] != 0)//更新Epos[i] 233 | {//未更新的条件为被消元行消为0 234 | for (int k = 31; k >= 0; k--) 235 | if (getBit(E[threadTurn][j], k)) 236 | { 237 | newEpos = 32 * j + k; 238 | break; 239 | } 240 | } 241 | } 242 | ePos[threadTurn] = newEpos; 243 | if (newEpos = -1)break; 244 | } 245 | sem_post(&semMain); 246 | //p->threadPos = newEpos; 247 | } 248 | } 249 | void pthreadEliminateImproved() 250 | { 251 | sem_init(&semMain, 0, 0); 252 | for (int id = 0; id < threadCount; id++) 253 | sem_init(&semWorkstart[id], 0, 0); 254 | for (int id = 0; id < threadCount; id++) 255 | { 256 | tp[id].threadID = id; 257 | pthread_create(&handle[id], NULL, pthreadFunction, &tp[id]); 258 | } 259 | for (turn = 0; turn < eNumOrigin; turn += threadCount) 260 | { 261 | finish = pow(2, threadCount) - 1;//采用位来记录是否完成 finish=32'b0000...0000是完成本轮消去,循环结束 262 | while (finish) 263 | { 264 | e = -1, s = -1;//本次消元操作的起点与终点 265 | for (int id = 0; id < threadCount; id++) 266 | if (getBit(finish, id)) 267 | s = max(ePos[turn + id], s); 268 | if (s == -1)break; 269 | e = s; 270 | while (rPos[--e] != -1); e += 1; 271 | //s到e为本次消去的区间 272 | for (int id = 0; id < threadCount; id++) 273 | if (getBit(finish, id)) 274 | sem_post(&semWorkstart[id]); 275 | for (int id = 0; id < threadCount; id++) 276 | if (getBit(finish, id)) 277 | sem_wait(&semMain); 278 | for (int id = 0; id < threadCount; id++)//处理有数据依赖的部分 279 | { 280 | int threadTurn = id + turn; 281 | if (getBit(finish, id) && rPos[ePos[threadTurn]] == -1) 282 | { 283 | setBit(finish, id, 0); 284 | rPos[ePos[threadTurn]] = rNumNow; 285 | memcpy(R[rNumNow], E[threadTurn], sizeof(R[rNumNow]));//E[i]升级为消元子 286 | rNumNow++; eNumNow--; 287 | } 288 | } 289 | //if (newEpos == -1)break;//该行消为空行 290 | } 291 | } 292 | for (int id = 0; id < threadCount; id++) 293 | sem_post(&semWorkstart[id]); 294 | 295 | for (int id = 0; id < threadCount; id++) 296 | pthread_join(handle[id], NULL); 297 | 298 | sem_destroy(&semMain); 299 | for (int id = 0; id < threadCount; id++) 300 | sem_destroy(&semWorkstart[id]); 301 | } 302 | int main() 303 | { 304 | timeval *tart = new timeval(); 305 | timeval *top = new timeval(); 306 | double urationTime = 0.0; 307 | 308 | timeval *start = new timeval(); 309 | timeval *stop = new timeval(); 310 | double durationTime = 0.0; 311 | 312 | gettimeofday(start, NULL); 313 | input(); 314 | gettimeofday(tart, NULL); 315 | eliminate(); 316 | //pthreadEliminateImproved(); 317 | gettimeofday(top, NULL); 318 | gettimeofday(stop, NULL); 319 | output(); 320 | //gettimeofday(stop, NULL); 321 | 322 | urationTime = top->tv_sec * 1000 + double(top->tv_usec) / 1000 - tart->tv_sec * 1000 - double(tart->tv_usec) / 1000; 323 | 324 | durationTime = stop->tv_sec * 1000 + double(stop->tv_usec) / 1000 - start->tv_sec * 1000 - double(start->tv_usec) / 1000; 325 | cout << " all time: " << double(durationTime) << " ms" << endl; 326 | cout << " algorithm time: " << double(urationTime) << " ms" << endl; 327 | cout << " io time: " << double(durationTime - urationTime) << " ms" << endl; 328 | 329 | return 0; 330 | } -------------------------------------------------------------------------------- /final/sparse_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | const int maxN = 30000;//最大消元子被消元子个数 9 | const int maxM = 8000;//向量的最大长度 10 | int E[maxN][maxM], R[maxN][maxM]; 11 | int eLen[maxN], rLen[maxN];//E[i]代表E第i行的元素数量,例：E[i]这一行仅E[i][0]有元素，eLen[i]=1; 12 | int rPos[maxN];//rPos[i]表示首项为i的消元行在R中第rPos行 13 | int rNumNow, eNumNow, eNumOrigin; 14 | int gettimeofday(struct timeval *tp, void *tzp) 15 | { 16 | time_t clock; 17 | struct tm tm; 18 | SYSTEMTIME wtm; 19 | GetLocalTime(&wtm); 20 | tm.tm_year = wtm.wYear - 1900; 21 | tm.tm_mon = wtm.wMonth - 1; 22 | tm.tm_mday = wtm.wDay; 23 | tm.tm_hour = wtm.wHour; 24 | tm.tm_min = wtm.wMinute; 25 | tm.tm_sec = wtm.wSecond; 26 | tm.tm_isdst = -1; 27 | clock = mktime(&tm); 28 | tp->tv_sec = clock; 29 | tp->tv_usec = wtm.wMilliseconds * 1000; 30 | return 0; 31 | } 32 | void input() 33 | { 34 | ifstream infile("消元子.txt"); 35 | int temp; 36 | bool newLine = true; 37 | for (int i = 0; i < maxN; i++) 38 | rPos[i] = -1; 39 | int count; 40 | while (infile >> temp) 41 | { 42 | if (newLine) 43 | { 44 | count = 0; 45 | rPos[temp] = rNumNow; 46 | newLine = false; 47 | } 48 | R[rNumNow][count++] = temp; 49 | infile.get(); 50 | if (infile.peek() == '\n') 51 | { 52 | rLen[rNumNow++] = count; 53 | infile.get(); 54 | newLine = true; 55 | } 56 | } 57 | infile.close(); 58 | 59 | newLine = true; 60 | ifstream infil("被消元行.txt"); 61 | while (infil >> temp) 62 | { 63 | if (newLine) 64 | { 65 | count = 0; 66 | newLine = false; 67 | } 68 | E[eNumNow][count++] = temp; 69 | infil.get(); 70 | if (infil.peek() == '\n') 71 | { 72 | eLen[eNumNow++] = count; 73 | infil.get(); 74 | newLine = true; 75 | } 76 | } 77 | eNumOrigin = eNumNow; 78 | infil.close(); 79 | } 80 | void output() 81 | {//当每行遇到零时停止输出忽视那些含有零的消元结果 82 | string dirName = "result.txt"; 83 | remove(dirName.c_str()); 84 | ofstream outfile; 85 | outfile.open(dirName, ios::app); 86 | outfile << "1************被消元行*************2" << endl; 87 | for (int i = 0; i < eNumOrigin; i++) 88 | { 89 | for (int d = 0; d < eLen[i]; d++) 90 | outfile << E[i][d] << ' '; 91 | outfile << endl; 92 | } 93 | } 94 | void eliminate() 95 | { 96 | for (int i = 0; i < eNumOrigin; i++) 97 | { 98 | int eTemp[maxM] = { 0 }, eNew[maxM] = { 0 };//本轮消去中用来取代旧的被消元行 99 | memcpy(eTemp, E[i], eLen[i] * sizeof(int)); 100 | int len = eLen[i], k; 101 | while (len && rPos[eTemp[0]] != -1) 102 | { 103 | k = 1, len = 0; 104 | int p = rPos[eTemp[0]], q = rLen[rPos[eTemp[0]]]; 105 | for (int j = 1; j < eLen[i] || k < q; j++) 106 | { 107 | while (R[p][k] > eTemp[j]) 108 | eNew[len++] = R[p][k++]; 109 | 110 | if (R[p][k] == eTemp[j]) 111 | k++; 112 | else// if (R[p][k] < eTemp[j]) 113 | eNew[len++] = eTemp[j]; 114 | } 115 | memset(eTemp, 0, eLen[i] * sizeof(int));//很重要，否则有可能留有残存的数据 116 | memcpy(eTemp, eNew, len * sizeof(int)); 117 | eLen[i] = len; 118 | 119 | } 120 | if (len != 0)//空行跳过 121 | { 122 | memset(E[i], 0, eLen[i] * sizeof(int)); 123 | eLen[i] = rLen[rNumNow] = len; 124 | memcpy(E[i], eNew, len * sizeof(int)); 125 | rPos[E[i][0]] = rNumNow; 126 | memcpy(R[rNumNow], E[i], len * sizeof(int)); 127 | rNumNow++; eNumNow--; 128 | } 129 | } 130 | } 131 | int main() 132 | { 133 | timeval *tart = new timeval(); 134 | timeval *top = new timeval(); 135 | double urationTime = 0.0; 136 | 137 | timeval *start = new timeval(); 138 | timeval *stop = new timeval(); 139 | double durationTime = 0.0; 140 | 141 | gettimeofday(start, NULL); 142 | input(); 143 | gettimeofday(tart, NULL); 144 | eliminate(); 145 | gettimeofday(top, NULL); 146 | gettimeofday(stop, NULL); 147 | output(); 148 | //gettimeofday(stop, NULL); 149 | 150 | urationTime = top->tv_sec * 1000 + double(top->tv_usec) / 1000 - tart->tv_sec * 1000 - double(tart->tv_usec) / 1000; 151 | 152 | durationTime = stop->tv_sec * 1000 + double(stop->tv_usec) / 1000 - start->tv_sec * 1000 - double(start->tv_usec) / 1000; 153 | cout << " all time: " << double(durationTime) << " ms" << endl; 154 | cout << " algorithm time: " << double(urationTime) << " ms" << endl; 155 | cout << " io time: " << double(durationTime - urationTime) << " ms" << endl; 156 | 157 | return 0; 158 | } -------------------------------------------------------------------------------- /final/wrong_pthread.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #pragma comment(lib, "pthreadVC2.lib") 9 | using namespace std; 10 | struct threadParam 11 | { 12 | int threadID, threadPos; 13 | }; 14 | const int maxN = 10000;//采用位图存储,消元子和消元行的个数之和 15 | const int maxM = 10000;//矩阵列数 16 | int R[maxN][maxM / 32 + 1];//消元子 17 | int E[maxN][maxM / 32 + 1];//被消元行 18 | int rPos[maxM];//rPos[i]表示首项为i的消元行在R中第rPos行 19 | int ePos[maxN];//ePos[i]表示E第i行首项的位置 20 | int rNumNow, eNumNow, eNumOrigin; 21 | const int threadCount = 1; 22 | pthread_t handle[threadCount]; 23 | sem_t semMain, semWorkstart[threadCount]; 24 | threadParam tp[threadCount]; 25 | int turn;//所有线程当前消去的轮次 26 | int s, e;//消去过程的起点终点 27 | int finish;//记录完成情况 28 | inline void setBit(int& a, int pos, int flag)//置位为flag 29 | { 30 | if (flag) 31 | a |= (1 << pos); 32 | else 33 | a &= ~(1 << pos); 34 | } 35 | inline int getBit(int& a, int pos) 36 | { 37 | return (a >> pos) & 1; 38 | } 39 | void input() 40 | { 41 | ifstream infile("消元子.txt"); 42 | int temp; 43 | bool newLine = true; 44 | for (int i = 0; i < maxM; i++) 45 | rPos[i] = -1; 46 | while (infile >> temp) 47 | { 48 | if (newLine) 49 | { 50 | rPos[temp] = rNumNow; 51 | newLine = false; 52 | } 53 | int pos1, pos2; 54 | pos1 = temp / 32; 55 | pos2 = temp % 32; 56 | setBit(R[rNumNow][pos1], pos2, 1); 57 | infile.get(); 58 | if (infile.peek() == '\n') 59 | { 60 | infile.get(); 61 | rNumNow++; 62 | newLine = true; 63 | } 64 | } 65 | infile.close(); 66 | 67 | newLine = true; 68 | ifstream infil("被消元行.txt"); 69 | while (infil >> temp) 70 | { 71 | if (newLine) 72 | { 73 | ePos[eNumNow] = temp; 74 | newLine = false; 75 | } 76 | int pos1, pos2; 77 | pos1 = temp / 32; 78 | pos2 = temp % 32; 79 | setBit(E[eNumNow][pos1], pos2, 1); 80 | infil.get(); 81 | if (infil.peek() == '\n') 82 | { 83 | infil.get(); 84 | eNumNow++; 85 | newLine = true; 86 | } 87 | } 88 | eNumOrigin = eNumNow; 89 | infil.close(); 90 | } 91 | void output() 92 | { 93 | string dirName = "result.txt"; 94 | remove(dirName.c_str()); 95 | ofstream outfile; 96 | outfile.open(dirName, ios::app); 97 | outfile << "1************被消元行*************0" << endl; 98 | for (int i = 0; i < eNumOrigin; i++) 99 | { 100 | if (!E[i][0] && ePos[i] <= 31) 101 | continue;//空行跳过 102 | 103 | for (int d = ePos[i] / 32; d >= 0; d--) 104 | { 105 | for (int j = 31; j >= 0; j--) 106 | { 107 | if (getBit(E[i][d], j)) 108 | outfile << j + d * 32 << ' '; 109 | } 110 | } 111 | outfile << endl; 112 | } 113 | } 114 | void eliminate() 115 | { 116 | for (int i = 0; i < eNumOrigin; i++) 117 | { 118 | while (rPos[ePos[i]] != -1) 119 | { 120 | int d = ePos[i] / 32, newEpos = -1; 121 | for (int j = d; j >= 0; j--) 122 | { 123 | E[i][j] ^= R[rPos[ePos[i]]][j]; 124 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 125 | {//未更新的条件为被消元行消为0 126 | for (int k = 31; k >= 0; k--) 127 | if (getBit(E[i][j], k)) 128 | { 129 | newEpos = 32 * j + k; 130 | break; 131 | } 132 | } 133 | } 134 | ePos[i] = newEpos; 135 | if (newEpos == -1)break;//该行消为空行 136 | } 137 | rPos[ePos[i]] = rNumNow; 138 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 139 | rNumNow++; eNumNow--; 140 | } 141 | } 142 | void* pthreadFunction(void* param) 143 | { 144 | threadParam* p = (threadParam*)param; 145 | int id = p->threadID; 146 | while (1) 147 | { 148 | sem_wait(&semWorkstart[id]); 149 | int threadTurn = id + turn; 150 | if (threadTurn > eNumOrigin) 151 | return NULL; 152 | //距s最近的k的倍数且大于s的数位置 s+(k-s%k)=s+k-s%k 153 | //距s最近的k的倍数且小于s的数的位置 s-s%k 154 | //当s%k==0时，所求得的区间为[s,s+k]也即右半区间 155 | 156 | //距s最近的模k得t且大于s的数位置 s+(k-s%k)-(k-t)=s+t-s%k 157 | //距s最近的模k得t且小于s的数位置 s-(s%k+k-t)=s-k+t-s%k 158 | //当s%k==t时，上述方法求得左半区间[s-k,s] 159 | int ss = s - 1 - s % 32, ee = e + 31 - e % 32; 160 | while (ePos[threadTurn] >= e && ePos[threadTurn] <= s && rPos[ePos[threadTurn]] != -1) 161 | { 162 | int newEpos = -1, flag = 1; 163 | for (int j = s; j > ss; j--)//j>=ss+1 164 | { 165 | int t1 = j / 32, t2 = j % 32; 166 | setBit(E[threadTurn][t1], t2, getBit(E[threadTurn][t1], t2) ^ getBit(R[rPos[ePos[threadTurn]]][t1], t2)); 167 | if (flag&&getBit(E[threadTurn][t1], t2))//更新Epos[i] 168 | {//未更新的条件为被消元行消为0 169 | newEpos = 32 * t1 + t2; 170 | flag = 0; 171 | } 172 | } 173 | int ds = ss / 32, de = ee / 32 + 1; 174 | for (int j = ds; j >= de; j--) 175 | { 176 | E[threadTurn][j] ^= R[rPos[ePos[threadTurn]]][j]; 177 | if (newEpos == -1 && E[threadTurn][j] != 0)//更新Epos[i] 178 | {//未更新的条件为被消元行消为0 179 | for (int k = 31; k >= 0; k--) 180 | if (getBit(E[threadTurn][j], k)) 181 | { 182 | newEpos = 32 * j + k; 183 | break; 184 | } 185 | } 186 | } 187 | for (int j = ee; j >= e; j--)//处理的区间均为[) 188 | { 189 | int t1 = j / 32, t2 = j % 32; 190 | setBit(E[threadTurn][t1], t2, getBit(E[threadTurn][t1], t2) ^ getBit(R[rPos[ePos[threadTurn]]][t1], t2)); 191 | if (flag&&getBit(E[threadTurn][t1], t2))//更新Epos[i] 192 | {//未更新的条件为被消元行消为0 193 | newEpos = 32 * t1 + t2; 194 | flag = 0; 195 | } 196 | } 197 | ePos[threadTurn] = newEpos; 198 | int m = 0; 199 | } 200 | //if (rPos[ePos[threadTurn]] == -1) 201 | //{ 202 | // setBit(finish, id, 0); 203 | // rPos[ePos[threadTurn]] = rNumNow; 204 | // memcpy(R[rNumNow], E[threadTurn], sizeof(R[rNumNow]));//E[i]升级为消元子 205 | // rNumNow++; eNumNow--;//可能出现数据竞争需修改 206 | //} 207 | sem_post(&semMain); 208 | //p->threadPos = newEpos; 209 | } 210 | } 211 | void pthreadEliminateImproved() 212 | { 213 | sem_init(&semMain, 0, 0); 214 | for (int id = 0; id < threadCount; id++) 215 | sem_init(&semWorkstart[id], 0, 0); 216 | for (int id = 0; id < threadCount; id++) 217 | { 218 | tp[id].threadID = id; 219 | pthread_create(&handle[id], NULL, pthreadFunction, &tp[id]); 220 | } 221 | for (int turn = 0; turn < eNumOrigin; turn += threadCount) 222 | { 223 | finish = pow(2, threadCount) - 1;//采用位来记录是否完成 finish=32'b0000...0000是完成本轮消去,循环结束 224 | while (finish) 225 | { 226 | e = -1, s = -1;//本次消元操作的起点与终点 227 | for (int id = 0; id < threadCount; id++) 228 | if (getBit(finish, id)) 229 | s = max(ePos[turn + id], s); 230 | e = s; 231 | while (rPos[--e] != -1); e += 1; 232 | //s到e为本次消去的区间 233 | for (int id = 0; id < threadCount; id++) 234 | if (getBit(finish, id)) 235 | sem_post(&semWorkstart[id]); 236 | for (int id = 0; id < threadCount; id++) 237 | if (getBit(finish, id)) 238 | sem_wait(&semMain); 239 | for (int id = 0; id < threadCount; id++) 240 | { 241 | int threadTurn = id + turn; 242 | if (getBit(finish, id)&&rPos[ePos[threadTurn]] == -1) 243 | { 244 | setBit(finish, id, 0); 245 | rPos[ePos[threadTurn]] = rNumNow; 246 | memcpy(R[rNumNow], E[id + turn], sizeof(R[rNumNow]));//E[i]升级为消元子 247 | rNumNow++; eNumNow--; 248 | } 249 | } 250 | //if (newEpos == -1)break;//该行消为空行 251 | } 252 | 253 | } 254 | for (int id = 0; id < threadCount; id++) 255 | sem_post(&semWorkstart[id]); 256 | for (int id = 0; id < threadCount; id++) 257 | pthread_join(handle[id], NULL); 258 | 259 | sem_destroy(&semMain); 260 | for (int id = 0; id < threadCount; id++) 261 | sem_destroy(&semWorkstart[id]); 262 | } 263 | int main() 264 | { 265 | input(); 266 | pthreadEliminateImproved(); 267 | //eliminate(); 268 | output(); 269 | return 0; 270 | } -------------------------------------------------------------------------------- /final/wrong_pthread_improved.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #pragma comment(lib, "pthreadVC2.lib") 9 | using namespace std; 10 | struct threadParam 11 | { 12 | int threadID, threadPos; 13 | }; 14 | const int maxN = 10000;//采用位图存储,消元子和消元行的个数之和 15 | const int maxM = 10000;//矩阵列数 16 | int R[maxN][maxM / 32 + 1];//消元子 17 | int E[maxN][maxM / 32 + 1];//被消元行 18 | int rPos[maxM];//rPos[i]表示首项为i的消元行在R中第rPos行 19 | int ePos[maxN];//ePos[i]表示E第i行首项的位置 20 | int rNumNow, eNumNow, eNumOrigin; 21 | const int threadCount = 3; 22 | pthread_t handle[threadCount]; 23 | sem_t semMain, semWorkstart[threadCount]; 24 | threadParam tp[threadCount]; 25 | int turn;//所有线程当前消去的轮次 26 | int s, e;//消去过程的起点终点 27 | int finish;//记录完成情况 28 | inline void setBit(int& a, int pos, int flag)//置位为flag 29 | { 30 | if (flag) 31 | a |= (1 << pos); 32 | else 33 | a &= ~(1 << pos); 34 | } 35 | inline int getBit(int& a, int pos) 36 | { 37 | return (a >> pos) & 1; 38 | } 39 | void input() 40 | { 41 | ifstream infile("消元子.txt"); 42 | int temp; 43 | bool newLine = true; 44 | for (int i = 0; i < maxM; i++) 45 | rPos[i] = -1; 46 | while (infile >> temp) 47 | { 48 | if (newLine) 49 | { 50 | rPos[temp] = rNumNow; 51 | newLine = false; 52 | } 53 | int pos1, pos2; 54 | pos1 = temp / 32; 55 | pos2 = temp % 32; 56 | setBit(R[rNumNow][pos1], pos2, 1); 57 | infile.get(); 58 | if (infile.peek() == '\n') 59 | { 60 | infile.get(); 61 | rNumNow++; 62 | newLine = true; 63 | } 64 | } 65 | infile.close(); 66 | 67 | newLine = true; 68 | ifstream infil("被消元行.txt"); 69 | while (infil >> temp) 70 | { 71 | if (newLine) 72 | { 73 | ePos[eNumNow] = temp; 74 | newLine = false; 75 | } 76 | int pos1, pos2; 77 | pos1 = temp / 32; 78 | pos2 = temp % 32; 79 | setBit(E[eNumNow][pos1], pos2, 1); 80 | infil.get(); 81 | if (infil.peek() == '\n') 82 | { 83 | infil.get(); 84 | eNumNow++; 85 | newLine = true; 86 | } 87 | } 88 | eNumOrigin = eNumNow; 89 | infil.close(); 90 | } 91 | void output() 92 | { 93 | string dirName = "result.txt"; 94 | remove(dirName.c_str()); 95 | ofstream outfile; 96 | outfile.open(dirName, ios::app); 97 | outfile << "1************被消元行*************1" << endl; 98 | for (int i = 0; i < eNumOrigin; i++) 99 | { 100 | if (!E[i][0] && ePos[i] <= 31) 101 | continue;//空行跳过 102 | 103 | for (int d = ePos[i] / 32; d >= 0; d--) 104 | { 105 | for (int j = 31; j >= 0; j--) 106 | { 107 | if (getBit(E[i][d], j)) 108 | outfile << j + d * 32 << ' '; 109 | } 110 | } 111 | outfile << endl; 112 | } 113 | } 114 | void eliminate() 115 | { 116 | for (int i = 0; i < eNumOrigin; i++) 117 | { 118 | while (rPos[ePos[i]] != -1) 119 | { 120 | int d = ePos[i] / 32, newEpos = -1; 121 | for (int j = d; j >= 0; j--) 122 | { 123 | E[i][j] ^= R[rPos[ePos[i]]][j]; 124 | if (newEpos == -1 && E[i][j] != 0)//更新Epos[i] 125 | {//未更新的条件为被消元行消为0 126 | for (int k = 31; k >= 0; k--) 127 | if (getBit(E[i][j], k)) 128 | { 129 | newEpos = 32 * j + k; 130 | break; 131 | } 132 | } 133 | } 134 | ePos[i] = newEpos; 135 | if (newEpos == -1)break;//该行消为空行 136 | } 137 | rPos[ePos[i]] = rNumNow; 138 | memcpy(R[rNumNow], E[i], sizeof(R[rNumNow]));//E[i]升级为消元子 139 | rNumNow++; eNumNow--; 140 | } 141 | } 142 | void* pthreadFunction(void* param) 143 | { 144 | threadParam* p = (threadParam*)param; 145 | int id = p->threadID; 146 | while (1) 147 | { 148 | sem_wait(&semWorkstart[id]); 149 | int threadTurn = id + turn; 150 | if (threadTurn >= eNumOrigin) 151 | return NULL; 152 | //距s最近的k的倍数且大于s的数位置 s+(k-s%k)=s+k-s%k 153 | //距s最近的k的倍数且小于s的数的位置 s-s%k 154 | //当s%k==0时，所求得的区间为[s,s+k]也即右半区间 155 | 156 | //距s最近的模k得t且大于s的数位置 s+(k-s%k)-(k-t)=s+t-s%k 157 | //距s最近的模k得t且小于s的数位置 s-(s%k+k-t)=s-k+t-s%k 158 | //当s%k==t时，上述方法求得左半区间[s-k,s] 159 | while (ePos[threadTurn] >= e && ePos[threadTurn] <= s && rPos[ePos[threadTurn]] != -1) 160 | { 161 | int d = ePos[threadTurn] / 32, newEpos = -1; 162 | for (int j = d; j >= 0; j--) 163 | { 164 | E[threadTurn][j] ^= R[rPos[ePos[threadTurn]]][j]; 165 | if (newEpos == -1 && E[threadTurn][j] != 0)//更新Epos[i] 166 | {//未更新的条件为被消元行消为0 167 | for (int k = 31; k >= 0; k--) 168 | if (getBit(E[threadTurn][j], k)) 169 | { 170 | newEpos = 32 * j + k; 171 | break; 172 | } 173 | } 174 | } 175 | ePos[threadTurn] = newEpos; 176 | } 177 | sem_post(&semMain); 178 | //p->threadPos = newEpos; 179 | } 180 | } 181 | void pthreadEliminateImproved() 182 | { 183 | sem_init(&semMain, 0, 0); 184 | for (int id = 0; id < threadCount; id++) 185 | sem_init(&semWorkstart[id], 0, 0); 186 | for (int id = 0; id < threadCount; id++) 187 | { 188 | tp[id].threadID = id; 189 | pthread_create(&handle[id], NULL, pthreadFunction, &tp[id]); 190 | } 191 | for (turn = 0; turn < eNumOrigin; turn += threadCount) 192 | { 193 | finish = pow(2, threadCount) - 1;//采用位来记录是否完成 finish=32'b0000...0000是完成本轮消去,循环结束 194 | while (finish) 195 | { 196 | e = -1, s = -1;//本次消元操作的起点与终点 197 | for (int id = 0; id < threadCount; id++) 198 | if (getBit(finish, id)) 199 | s = max(ePos[turn + id], s); 200 | e = s; 201 | while (rPos[--e] != -1); e += 1; 202 | //s到e为本次消去的区间 203 | for (int id = 0; id < threadCount; id++) 204 | if (getBit(finish, id)) 205 | sem_post(&semWorkstart[id]); 206 | for (int id = 0; id < threadCount; id++) 207 | if (getBit(finish, id)) 208 | sem_wait(&semMain); 209 | for (int id = 0; id < threadCount; id++)//处理有数据依赖的部分 210 | { 211 | int threadTurn = id + turn; 212 | if (getBit(finish, id)&&rPos[ePos[threadTurn]] == -1) 213 | { 214 | setBit(finish, id, 0); 215 | rPos[ePos[threadTurn]] = rNumNow; 216 | memcpy(R[rNumNow], E[id + turn], sizeof(R[rNumNow]));//E[i]升级为消元子 217 | rNumNow++; eNumNow--; 218 | } 219 | } 220 | //if (newEpos == -1)break;//该行消为空行 221 | } 222 | } 223 | for (int id = 0; id < threadCount; id++) 224 | sem_post(&semWorkstart[id]); 225 | 226 | for (int id = 0; id < threadCount; id++) 227 | pthread_join(handle[id], NULL); 228 | 229 | sem_destroy(&semMain); 230 | for (int id = 0; id < threadCount; id++) 231 | sem_destroy(&semWorkstart[id]); 232 | } 233 | int main() 234 | { 235 | input(); 236 | pthreadEliminateImproved(); 237 | //eliminate(); 238 | output(); 239 | return 0; 240 | } -------------------------------------------------------------------------------- /homework1/main1.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | using namespace std; 6 | int size =1000000; 7 | double *a; 8 | double sum=0; 9 | timeval *start; 10 | timeval *stop; 11 | void init() 12 | { 13 | a=new double[size]; 14 | for(int i=0;i 1; m /= 2) 32 | for (int i = 0; i < m / 2; i++) 33 | a[i ] = a[i * 2] + a[i * 2 + 1]; 34 | sum=a[0]; 35 | } 36 | void optimizationAlgorithm2(int n) 37 | { 38 | if (n == 1) 39 | return; 40 | else 41 | { 42 | for (int i = 0; i < n / 2; i++) 43 | a[i]+=a[n-i-1]; 44 | n = n / 2; 45 | optimizationAlgorithm2(n); 46 | } 47 | } 48 | void optimizationAlgorithm3() 49 | { 50 | int sum1 = 0, sum2 = 0; 51 | for (int i = 0;i < size; i += 2) { 52 | sum1 += a[i]; 53 | sum2 += a[i + 1]; 54 | } 55 | sum = sum1 + sum2; 56 | } 57 | int main() 58 | { 59 | timeval *start=new timeval(); 60 | timeval *stop=new timeval(); 61 | double durationTime=0.0; 62 | 63 | init(); 64 | gettimeofday(start,NULL); 65 | trivialAlgorithm(); 66 | gettimeofday(stop,NULL); 67 | destroy1(); 68 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 69 | cout << " trivialAlgorithm time: " << double(durationTime) << " ms" << endl; 70 | 71 | init(); 72 | gettimeofday(start,NULL); 73 | optimizationAlgorithm1(); 74 | gettimeofday(stop,NULL); 75 | destroy1(); 76 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 77 | cout << " optimizationAlgorithm1 time: " << double(durationTime) << " ms" << endl; 78 | 79 | init(); 80 | gettimeofday(start,NULL); 81 | optimizationAlgorithm2(size-1); 82 | gettimeofday(stop,NULL); 83 | destroy1(); 84 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 85 | cout << " optimizationAlgorithm1 time: " << double(durationTime) << " ms" << endl; 86 | 87 | init(); 88 | gettimeofday(start,NULL); 89 | optimizationAlgorithm3(); 90 | gettimeofday(stop,NULL); 91 | destroy1(); 92 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 93 | cout << " optimizationAlgorithm1 time: " << double(durationTime) << " ms" << endl; 94 | 95 | 96 | 97 | return 0; 98 | } 99 | -------------------------------------------------------------------------------- /homework1/main2.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | using namespace std; 4 | const int size=4000; 5 | int matrix[size][size]; 6 | int b[size]; 7 | int sum[size]; 8 | void init() 9 | { 10 | for(int i=0;itv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 53 | cout << "col_major time: " << double(durationTime) << " ms" << endl; 54 | 55 | init(); 56 | gettimeofday(start,NULL); 57 | row_major(); 58 | gettimeofday(stop,NULL); 59 | durationTime =stop->tv_sec*1000+double(stop->tv_usec)/1000-start->tv_sec*1000-double(start->tv_usec)/1000; 60 | cout << "row_major time: " << double(durationTime) << " ms" << endl; 61 | return 0; 62 | } 63 | -------------------------------------------------------------------------------- /mpi/mpi.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #pragma comment(lib,"mpi.lib") 10 | using namespace std; 11 | const int N = 20, numProcess = 10; 12 | float a[N][N]; 13 | 14 | void init() 15 | { 16 | for (int i = 0; i < N; i++) 17 | for (int j = 0; j < N; j++) 18 | a[i][j] = float(rand()) / 10; 19 | } 20 | void SequentialAlgorithm() 21 | { 22 | for (int k = 0; k < N; k++) 23 | { 24 | for (int j = k + 1; j < N; j++) 25 | a[k][j] /= a[k][k]; 26 | a[k][k] = 1.0; 27 | for (int i = k + 1; i < N; i++) { 28 | 29 | for (int j = k + 1; j < N; j++) 30 | a[i][j] -= a[i][k] * a[k][j]; 31 | a[i][k] = 0; 32 | } 33 | } 34 | } 35 | 36 | void show() 37 | { 38 | 39 | for (int i = 0; i < N; i++) { 40 | for (int j = 0; j < N; j++) 41 | cout << a[i][j] << ' '; 42 | cout << endl; 43 | } 44 | } 45 | int main(int argc, char *argv[]) 46 | { 47 | int myid; 48 | MPI_Status status; 49 | MPI_Init(0, 0); 50 | double start, end; 51 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 52 | int r1 = myid * (N / numProcess), r2 = (myid == numProcess - 1) ? N - 1 : (myid + 1)*(N / numProcess) - 1; 53 | if (myid == 0) 54 | { 55 | init(); 56 | for (int i = 1; i < numProcess; ++i) 57 | { 58 | int r11 = i * (N / numProcess), r22 = (i == numProcess - 1) ? N - 1 : (i + 1)*(N / numProcess) - 1; 59 | MPI_Send(&a[r11][0], (r22 - r11 + 1)* N, MPI_FLOAT, i, N + 1, MPI_COMM_WORLD); 60 | } 61 | } 62 | else 63 | { 64 | MPI_Recv(&a[r1][0], (r2 - r1 + 1)*N, MPI_FLOAT, 0, N + 1, MPI_COMM_WORLD, &status); 65 | } 66 | MPI_Barrier(MPI_COMM_WORLD); 67 | start = MPI_Wtime(); 68 | for (int k = 0; k < N; k++) 69 | { 70 | if (myid == 0) 71 | { 72 | for (int j = k + 1; j < N; ++j) 73 | a[k][j] /= a[k][k]; 74 | a[k][k] = 1.0; 75 | for (int j = 1; j < numProcess; ++j) 76 | MPI_Send(&a[k][0], N, MPI_FLOAT, j, k + 1, MPI_COMM_WORLD); 77 | } 78 | else 79 | MPI_Recv(&a[k][0], N, MPI_FLOAT, 0, k + 1, MPI_COMM_WORLD, &status); 80 | if (r2 >= k + 1) 81 | { 82 | for (int i = max(r1, k + 1); i <= r2; ++i) 83 | { 84 | for (int j = k + 1; j < N; ++j) 85 | { 86 | a[i][j] -= a[k][j] * a[i][k]; 87 | } 88 | a[i][k] = 0.0; 89 | if (i == k + 1 && myid != 0) 90 | MPI_Send(&a[i][0], N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); 91 | } 92 | } 93 | if (myid == 0 && k + 1 > r2&&k + 1 < N) 94 | MPI_Recv(&a[k + 1][0], N, MPI_FLOAT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 95 | } 96 | MPI_Barrier(MPI_COMM_WORLD); 97 | end = MPI_Wtime(); 98 | cout << "MPIAlgorithm time " << (end - start) * 1000 << " ms" << endl; 99 | MPI_Finalize(); 100 | return 0; 101 | } 102 | -------------------------------------------------------------------------------- /mpi/mpi_improved.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #pragma comment(lib,"mpi.lib") 10 | using namespace std; 11 | const int N = 20, numProcess = 9; 12 | float a[N][N]; 13 | void init() 14 | { 15 | for (int i = 0; i < N; i++) 16 | for (int j = 0; j < N; j++) 17 | a[i][j] = float(rand()) / 10; 18 | } 19 | void SequentialAlgorithm() 20 | { 21 | for (int k = 0; k < N; k++) 22 | { 23 | for (int j = k + 1; j < N; j++) 24 | a[k][j] /= a[k][k]; 25 | a[k][k] = 1.0; 26 | for (int i = k + 1; i < N; i++) { 27 | 28 | for (int j = k + 1; j < N; j++) 29 | a[i][j] -= a[i][k] * a[k][j]; 30 | a[i][k] = 0; 31 | } 32 | } 33 | } 34 | 35 | void show() 36 | { 37 | 38 | for (int i = 0; i < N; i++) { 39 | for (int j = 0; j < N; j++) 40 | cout << a[i][j] << ' '; 41 | cout << endl; 42 | } 43 | } 44 | int main(int argc, char *argv[]) 45 | { 46 | int myid; 47 | MPI_Status status; 48 | MPI_Init(NULL, NULL); 49 | double start, end; 50 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 51 | if (myid == 0) 52 | { 53 | init(); 54 | for (int i = 1; i < numProcess; ++i) 55 | for (int j = i; j < N; j += numProcess) 56 | MPI_Send(&a[j][0], N, MPI_FLOAT, i, j, MPI_COMM_WORLD); 57 | } 58 | else 59 | { 60 | for (int j = myid; j < N; j += numProcess) 61 | MPI_Recv(&a[j][0], N, MPI_FLOAT, 0, j, MPI_COMM_WORLD, &status); 62 | } 63 | MPI_Barrier(MPI_COMM_WORLD); 64 | start = MPI_Wtime(); 65 | for (int k = 0; k < N; ++k) 66 | { 67 | //int curid = k % numProcess; 68 | if (myid == 0) 69 | { 70 | for (int j = k + 1; j < N; ++j) 71 | a[k][j] /= a[k][k]; 72 | a[k][k] = 1.0; 73 | for (int j = 1; j < numProcess; ++j) 74 | { 75 | MPI_Send(&a[k][0], N, MPI_FLOAT, j, j, MPI_COMM_WORLD); 76 | } 77 | } 78 | else 79 | MPI_Recv(&a[k][0], N, MPI_FLOAT, 0, myid, MPI_COMM_WORLD, &status); 80 | int r2 = myid; 81 | while (r2 < k + 1)r2 += numProcess; 82 | for (int i = r2; i < N; i += numProcess) 83 | { 84 | for (int j = k + 1; j < N; ++j) 85 | a[i][j] -= a[k][j] * a[i][k]; 86 | a[i][k] = 0.0; 87 | } 88 | if ((k + 1) % numProcess == myid && myid != 0) 89 | MPI_Send(&a[k + 1][0], N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); 90 | if (myid == 0 && (k + 1)%numProcess !=0&&k + 1 < N) 91 | MPI_Recv(&a[k + 1][0], N, MPI_FLOAT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 92 | } 93 | end = MPI_Wtime(); 94 | if (myid == 0) { 95 | //show(); 96 | cout << "MPIAlgorithm time " << (end - start) * 1000 << " ms" << endl; 97 | } 98 | MPI_Finalize(); 99 | return 0; 100 | } 101 | -------------------------------------------------------------------------------- /mpi/mpi_omp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #pragma comment(lib,"mpi.lib") 11 | using namespace std; 12 | const int N = 2000, numProcess = 9; 13 | float a[N][N]; 14 | void init() 15 | { 16 | for (int i = 0; i < N; i++) 17 | for (int j = 0; j < N; j++) 18 | a[i][j] = float(rand()) / 10; 19 | } 20 | void SequentialAlgorithm() 21 | { 22 | for (int k = 0; k < N; k++) 23 | { 24 | for (int j = k + 1; j < N; j++) 25 | a[k][j] /= a[k][k]; 26 | a[k][k] = 1.0; 27 | for (int i = k + 1; i < N; i++) { 28 | 29 | for (int j = k + 1; j < N; j++) 30 | a[i][j] -= a[i][k] * a[k][j]; 31 | a[i][k] = 0; 32 | } 33 | } 34 | } 35 | 36 | void show() 37 | { 38 | 39 | for (int i = 0; i < N; i++) { 40 | for (int j = 0; j < N; j++) 41 | cout << a[i][j] << ' '; 42 | cout << endl; 43 | } 44 | } 45 | int main(int argc, char *argv[]) 46 | { 47 | int myid; 48 | MPI_Status status; 49 | MPI_Init(NULL, NULL); 50 | double start, end; 51 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 52 | if (myid == 0) 53 | { 54 | init(); 55 | for (int i = 1; i < numProcess; ++i) 56 | for (int j = i; j < N; j += numProcess) 57 | MPI_Send(&a[j][0], N, MPI_FLOAT, i, j, MPI_COMM_WORLD); 58 | } 59 | else 60 | { 61 | for (int j = myid; j < N; j += numProcess) 62 | MPI_Recv(&a[j][0], N, MPI_FLOAT, 0, j, MPI_COMM_WORLD, &status); 63 | } 64 | MPI_Barrier(MPI_COMM_WORLD); 65 | start = MPI_Wtime(); 66 | int i, j, k, r2; 67 | #pragma omp parallel num_threads(threadCount),private(i,j,k,r2) 68 | for (k = 0; k < N; ++k) 69 | { 70 | #pragma omp single 71 | { 72 | if (myid == 0) 73 | { 74 | for (j = k + 1; j < N; ++j) 75 | a[k][j] /= a[k][k]; 76 | a[k][k] = 1.0; 77 | for (j = 1; j < numProcess; ++j) 78 | { 79 | MPI_Send(&a[k][0], N, MPI_FLOAT, j, j, MPI_COMM_WORLD); 80 | } 81 | } 82 | else 83 | MPI_Recv(&a[k][0], N, MPI_FLOAT, 0, myid, MPI_COMM_WORLD, &status); 84 | r2 = myid; 85 | while (r2 < k + 1)r2 += numProcess; 86 | } 87 | #pragma omp for 88 | for (i = r2; i < N; i += numProcess) 89 | { 90 | for (j = k + 1; j < N; ++j) 91 | a[i][j] -= a[k][j] * a[i][k]; 92 | a[i][k] = 0.0; 93 | } 94 | #pragma omp single 95 | { 96 | if ((k + 1) % numProcess == myid && myid != 0) 97 | MPI_Send(&a[k + 1][0], N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); 98 | if (myid == 0 && (k + 1) % numProcess != 0 && k + 1 < N) 99 | MPI_Recv(&a[k + 1][0], N, MPI_FLOAT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 100 | } 101 | 102 | } 103 | end = MPI_Wtime(); 104 | if (myid == 0) { 105 | show(); 106 | cout << "MPIAlgorithm time " << (end - start) * 1000 << " ms" << endl; 107 | } 108 | MPI_Finalize(); 109 | return 0; 110 | } -------------------------------------------------------------------------------- /mpi/mpi_omp_simd.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #pragma comment(lib,"mpi.lib") 9 | using namespace std; 10 | const int N = 2000, numProcess = 6, threadCount = 6; 11 | float a[N][N]; 12 | void init() 13 | { 14 | for (int i = 0; i < N; i++) 15 | for (int j = 0; j < N; j++) 16 | a[i][j] = float(rand()) / 10; 17 | } 18 | void SequentialAlgorithm() 19 | { 20 | for (int k = 0; k < N; k++) 21 | { 22 | for (int j = k + 1; j < N; j++) 23 | a[k][j] /= a[k][k]; 24 | a[k][k] = 1.0; 25 | for (int i = k + 1; i < N; i++) { 26 | 27 | for (int j = k + 1; j < N; j++) 28 | a[i][j] -= a[i][k] * a[k][j]; 29 | a[i][k] = 0; 30 | } 31 | } 32 | } 33 | 34 | void show() 35 | { 36 | 37 | for (int i = 0; i < N; i++) { 38 | for (int j = 0; j < N; j++) 39 | printf("%f ", a[i][j]); 40 | printf("\n"); 41 | } 42 | } 43 | int main(int argc, char *argv[]) 44 | { 45 | int myid; 46 | MPI_Status status; 47 | MPI_Init(NULL, NULL); 48 | double start, end; 49 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 50 | if (myid == 0) 51 | { 52 | init(); 53 | for (int i = 1; i < numProcess; ++i) 54 | for (int j = i; j < N; j += numProcess) 55 | MPI_Send(&a[j][0], N, MPI_FLOAT, i, j, MPI_COMM_WORLD); 56 | } 57 | else 58 | { 59 | for (int j = myid; j < N; j += numProcess) 60 | MPI_Recv(&a[j][0], N, MPI_FLOAT, 0, j, MPI_COMM_WORLD, &status); 61 | } 62 | MPI_Barrier(MPI_COMM_WORLD); 63 | start = MPI_Wtime(); 64 | __m128 t0, t1, t2, t3; 65 | int i, j, k, r2; 66 | float temp1[4], temp2[4]; 67 | #pragma omp parallel num_threads(threadCount),private(i,j,k,r2,t0,t1,t2,t3,temp1,temp2) 68 | for (k = 0; k < N; ++k) 69 | { 70 | #pragma omp single 71 | { 72 | if (myid == 0) 73 | { 74 | temp1[0] = temp1[1] = temp1[2] = temp1[3] = a[k][k]; 75 | t0 = _mm_loadu_ps(temp1); 76 | for (j = k + 1; j+3 < N; j+=4) 77 | { 78 | t1 = _mm_loadu_ps(a[k] + j); 79 | t2 = _mm_div_ps(t1, t0); 80 | _mm_storeu_ps(a[k] + j, t2); 81 | } 82 | for (; j < N; j++) 83 | a[k][j] /= a[k][k]; 84 | a[k][k] = 1.0; 85 | for (j = 1; j < numProcess; ++j) 86 | { 87 | MPI_Send(&a[k][0], N, MPI_FLOAT, j, j, MPI_COMM_WORLD); 88 | } 89 | } 90 | else 91 | MPI_Recv(&a[k][0], N, MPI_FLOAT, 0, myid, MPI_COMM_WORLD, &status); 92 | r2 = myid; 93 | while (r2 < k + 1)r2 += numProcess; 94 | } 95 | #pragma omp for 96 | for (i = r2; i < N; i += numProcess) 97 | { 98 | temp2[0] = temp2[1] = temp2[2] = temp2[3] = a[k][k]; 99 | t0 = _mm_loadu_ps(temp2); 100 | for (j = k + 1; j + 3 < N; j += 4) 101 | { 102 | t1 = _mm_loadu_ps(a[k] + j); 103 | t2 = _mm_loadu_ps(a[i] + j); 104 | t3 = _mm_mul_ps(t0, t1); 105 | t2 = _mm_sub_ps(t2, t3); 106 | _mm_storeu_ps(a[i] + j, t2); 107 | } 108 | for (; j < N; j++) 109 | a[i][j] -= a[i][k] * a[k][j]; 110 | a[i][k] = 0.0; 111 | } 112 | #pragma omp single 113 | { 114 | if ((k + 1) % numProcess == myid && myid != 0) 115 | MPI_Send(&a[k + 1][0], N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); 116 | if (myid == 0 && (k + 1) % numProcess != 0 && k + 1 < N) 117 | MPI_Recv(&a[k + 1][0], N, MPI_FLOAT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 118 | } 119 | 120 | } 121 | end = MPI_Wtime(); 122 | if (myid == 0) { 123 | //show(); 124 | printf("MPIAlgorithm time %f ms", (end - start) * 1000); 125 | } 126 | MPI_Finalize(); 127 | return 0; 128 | } 129 | -------------------------------------------------------------------------------- /mpi/mpi_pipeline.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #pragma comment(lib,"mpi.lib") 10 | using namespace std; 11 | const int N = 2000, numProcess = 6; 12 | float a[N][N]; 13 | void init() 14 | { 15 | for (int i = 0; i < N; i++) 16 | for (int j = 0; j < N; j++) 17 | a[i][j] = float(rand()) / 10; 18 | } 19 | void SequentialAlgorithm() 20 | { 21 | for (int k = 0; k < N; k++) 22 | { 23 | for (int j = k + 1; j < N; j++) 24 | a[k][j] /= a[k][k]; 25 | a[k][k] = 1.0; 26 | for (int i = k + 1; i < N; i++) { 27 | 28 | for (int j = k + 1; j < N; j++) 29 | a[i][j] -= a[i][k] * a[k][j]; 30 | a[i][k] = 0; 31 | } 32 | } 33 | } 34 | 35 | void show() 36 | { 37 | 38 | for (int i = 0; i < N; i++) { 39 | for (int j = 0; j < N; j++) 40 | cout << a[i][j] << ' '; 41 | cout << endl; 42 | } 43 | } 44 | int main(int argc, char *argv[]) 45 | { 46 | int myid; 47 | MPI_Status status; 48 | MPI_Init(NULL, NULL); 49 | double start, end; 50 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 51 | if (myid == 0) 52 | { 53 | init(); 54 | for (int i = 1; i < numProcess; ++i) 55 | for (int j = i; j < N; j += numProcess) 56 | MPI_Send(&a[j][0], N, MPI_FLOAT, i, j, MPI_COMM_WORLD); 57 | } 58 | else 59 | { 60 | for (int j = myid; j < N; j += numProcess) 61 | MPI_Recv(&a[j][0], N, MPI_FLOAT, 0, j, MPI_COMM_WORLD, &status); 62 | } 63 | 64 | MPI_Barrier(MPI_COMM_WORLD); 65 | start = MPI_Wtime(); 66 | for (int k = 0; k < N; ++k) 67 | { 68 | //int curid = k % numProcess; 69 | if (myid == 0) 70 | { 71 | 72 | for (int j = k + 1; j < N; ++j) 73 | a[k][j] /= a[k][k]; 74 | a[k][k] = 1.0; 75 | MPI_Send(&a[k][0], N, MPI_FLOAT, myid+1, 0, MPI_COMM_WORLD); 76 | //cout << myid << "send" << k << endl; 77 | } 78 | else if (myid == numProcess - 1) 79 | { 80 | MPI_Recv(&a[k][0], N, MPI_FLOAT, myid - 1, 0, MPI_COMM_WORLD, &status); 81 | //cout << myid << "recieve" << k << endl; 82 | } 83 | else 84 | { 85 | MPI_Recv(&a[k][0], N, MPI_FLOAT, myid - 1, 0, MPI_COMM_WORLD, &status); 86 | //cout << myid << "recieve" << k << endl; 87 | //cout << myid << "send" << k << endl; 88 | MPI_Send(&a[k][0], N, MPI_FLOAT, myid + 1, 0, MPI_COMM_WORLD); 89 | } 90 | int r2 = myid; 91 | while (r2 < k + 1)r2 += numProcess; 92 | for (int i = r2; i < N; i += numProcess) 93 | { 94 | for (int j = k + 1; j < N; ++j) 95 | a[i][j] -= a[k][j] * a[i][k]; 96 | a[i][k] = 0.0; 97 | } 98 | if ((k + 1) % numProcess == myid && myid != 0) 99 | MPI_Send(&a[k + 1][0], N, MPI_FLOAT, 0, 0, MPI_COMM_WORLD); 100 | if (myid == 0 && (k + 1) % numProcess != 0 && k + 1 < N) 101 | MPI_Recv(&a[k + 1][0], N, MPI_FLOAT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 102 | } 103 | end = MPI_Wtime(); 104 | if (myid == 0) { 105 | //show(); 106 | cout << "MPIAlgorithm time " << (end - start) * 1000 << " ms" << endl; 107 | } 108 | MPI_Finalize(); 109 | return 0; 110 | } -------------------------------------------------------------------------------- /mpi/mpi_specialGauss.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #pragma comment(lib,"mpi.lib") 13 | using namespace std; 14 | const int maxN = 30000;//采用位图存储,消元子和消元行的个数之和 15 | const int maxM = 23500;//矩阵列数 16 | const int numProcess = 6; 17 | int R[maxN][maxM / 32 + 1];//消元子 18 | int E[maxN][maxM / 32 + 1];//被消元行 19 | int Rpos[maxM];//Rpos[i]表示首项为i的消元行在R中第Rpos行 20 | int Epos[maxN];//Epos[i]表示E第i行首项的位置 21 | int sumR, sumE, sumEtemp; 22 | inline void setBit(int& a, int pos, int flag)//置位为flag 23 | { 24 | if (flag) 25 | a |= (1 << pos); 26 | else 27 | a &= ~(1 << pos); 28 | } 29 | inline int getBit(int& a, int pos) 30 | { 31 | return (a >> pos) & 1; 32 | } 33 | void RInput() 34 | {//读入消元子 35 | ifstream infile("消元子.txt"); 36 | int temp; 37 | bool newLine = true; 38 | for (int i = 0; i < maxM; i++) 39 | Rpos[i] = -1; 40 | while (infile >> temp) 41 | { 42 | if (newLine) 43 | { 44 | Rpos[temp] = sumR; 45 | newLine = false; 46 | } 47 | int pos1, pos2; 48 | pos1 = temp / 32; 49 | pos2 = temp % 32; 50 | setBit(R[sumR][pos1], pos2, 1); 51 | infile.get(); 52 | if (infile.peek() == '\n') 53 | { 54 | infile.get(); 55 | sumR++; 56 | newLine = true; 57 | } 58 | } 59 | infile.close(); 60 | } 61 | void EInput() 62 | {//读入被消元行 63 | int temp; 64 | bool newLine = true; 65 | ifstream infile("被消元行.txt"); 66 | while (infile >> temp) 67 | { 68 | if (newLine) 69 | { 70 | Epos[sumE] = temp; 71 | newLine = false; 72 | } 73 | int pos1, pos2; 74 | pos1 = temp / 32; 75 | pos2 = temp % 32; 76 | setBit(E[sumE][pos1], pos2, 1); 77 | infile.get(); 78 | if (infile.peek() == '\n') 79 | { 80 | infile.get(); 81 | sumE++; 82 | newLine = true; 83 | } 84 | } 85 | sumEtemp = sumE; 86 | infile.close(); 87 | } 88 | void Output() 89 | {//将结果输出到result.txt中 90 | ofstream outfile("result.txt"); 91 | outfile << "************被消元行*************" << endl; 92 | for (int i = 0; i < sumEtemp; i++) 93 | { 94 | if (!E[i][0] && Epos[i] <= 31) 95 | continue;//空行跳过 96 | 97 | for (int d = Epos[i] / 32; d >= 0; d--) 98 | { 99 | for (int j = 31; j >= 0; j--) 100 | { 101 | if (getBit(E[i][d], j)) 102 | outfile << j + d * 32 << ' '; 103 | } 104 | } 105 | outfile << endl; 106 | } 107 | } 108 | int main(int argc,char* argv[]) 109 | { 110 | 111 | int myid; 112 | MPI_Status status; 113 | MPI_Init(NULL, NULL); 114 | double start, end; 115 | MPI_Comm_rank(MPI_COMM_WORLD, &myid); 116 | int colSize = maxM / 32 + 1; 117 | EInput(); 118 | RInput(); 119 | MPI_Barrier(MPI_COMM_WORLD); 120 | start = MPI_Wtime(); 121 | if(myid==0)//主从式 122 | for (int i = 0; i < sumEtemp; i++) 123 | { 124 | while ((E[i][0] || Epos[i] > 31) && sumE > 0) 125 | { 126 | if (Rpos[Epos[i]] != -1) 127 | { 128 | int newEpos = -1, d = Epos[i] / 32; 129 | for (int id = 1; id < numProcess; i++) 130 | { 131 | MPI_Send(&i, 1, MPI_INT, id, 0, MPI_COMM_WORLD);//发送行号 132 | MPI_Send(&d, 1, MPI_INT, id, 1, MPI_COMM_WORLD);//发送任务量 133 | MPI_Send(&E[i][0], colSize, MPI_INT, id, 2, MPI_COMM_WORLD); 134 | } 135 | for (int id = 1; id < numProcess; id++) 136 | { 137 | int processMax; 138 | MPI_Recv(&processMax, 1, MPI_INT, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, &status); 139 | newEpos = max(processMax, newEpos); 140 | int s = d / (numProcess - 1)*(id - 1);//start end 141 | int e = (id == numProcess - 1) ? d : d / (numProcess - 1)*id; 142 | MPI_Recv(&E[i][s], e - s + 1, MPI_INT, id, 1, MPI_COMM_WORLD, &status); 143 | } 144 | Epos[i] = newEpos; 145 | } 146 | else 147 | { 148 | Rpos[Epos[i]] = sumR; 149 | memcpy(R[sumR], E[i], sizeof(R[sumR]));//E[i]升级为消元子 150 | sumR++; sumE--; 151 | break; 152 | } 153 | } 154 | } 155 | else 156 | while (1) 157 | { 158 | int turn, d; 159 | MPI_Recv(&turn, 1, MPI_INT, 0, 0, MPI_COMM_WORLD, &status); 160 | MPI_Recv(&d, 1, MPI_INT, 0, 1, MPI_COMM_WORLD, &status); 161 | MPI_Recv(&E[turn], colSize, MPI_INT, 0, 2, MPI_COMM_WORLD, &status); 162 | int s = d / (numProcess - 1)*(myid - 1);//start end 163 | int e = (myid == numProcess - 1) ? d : d / (numProcess - 1)*myid; 164 | for (int j = e - 1; j >= s; j++) 165 | { 166 | E[turn][j] ^= R[Rpos[Epos[turn]]][j]; 167 | } 168 | int newpos = -1; 169 | for (int j = e - 1; j >= s; j--) 170 | { 171 | if (E[turn][j] == 0)continue; 172 | for(int k=31;k>=0;k--) 173 | if (getBit(E[turn][j], k)) 174 | { 175 | newpos = 32 * j + k; 176 | break; 177 | } 178 | if (newpos != -1)break; 179 | } 180 | MPI_Send(&newpos, 1, MPI_INT, 0, 0, MPI_COMM_WORLD); 181 | MPI_Send(&E[turn][s], e - s + 1, MPI_INT, 0, 1, MPI_COMM_WORLD); 182 | } 183 | MPI_Barrier(MPI_COMM_WORLD); 184 | end = MPI_Wtime(); 185 | if (myid == 0) 186 | { 187 | Output(); 188 | cout << "MPIAlgorithm time " << (end - start) * 1000 << " ms" << endl; 189 | } 190 | MPI_Finalize(); 191 | return 0; 192 | } -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | parallel programming homework1 2 | parallel programming SIMD homework 3 | --------------------------------------------------------------------------------