├── .gitignore ├── .idea ├── .name ├── dictionaries │ └── zhuohan123.xml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── openmp-for-python-2.7.iml └── workspace.xml ├── README.md ├── adds-out.py ├── adds.py ├── cpu_count.txt ├── lib ├── Core.cpp ├── Core.h ├── OMP.cpp ├── OMP.h ├── PY.cpp ├── PY.h ├── Toolkit.cpp ├── Toolkit.h ├── cpu_count.py ├── delete_empty_lines.py ├── main.cpp ├── main.h ├── makefile ├── parse_omp.py ├── parseprint.py ├── parseprint.pyc ├── transform └── tree.py ├── omp.py ├── other-examples-out.py ├── other-examples.py ├── pyomp ├── std_test ├── MatrixMultiple-out.py ├── MatrixMultiple.py ├── adds-out.py ├── adds.py ├── pi-out.py ├── pi.py ├── test_barrier-out.py ├── test_barrier.py ├── test_for_critical_nowait-out.py ├── test_for_critical_nowait.py ├── test_for_reduce_dynamic-out.py ├── test_for_reduce_dynamic.py ├── test_haswait-out.py ├── test_haswait.py ├── test_nowait-out.py ├── test_nowait.py ├── test_parallel-out.py ├── test_parallel.py ├── test_sections-out.py ├── test_sections.py ├── test_variable-out.py └── test_variable.py ├── tests ├── MatrixMultiple.py ├── adds.py ├── list_test.py ├── omp_lib_test.py ├── other-examples.py ├── pi.py ├── test_program.py ├── test_program_1.py ├── test_program_2.py ├── test_program_out.py └── threading_test.py └── tmp ├── log_code.txt ├── log_omp.txt └── no_blank_line.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | 103 | # idea 104 | .idea/ 105 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | openmp-for-python-2.7 -------------------------------------------------------------------------------- /.idea/dictionaries/zhuohan123.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | AngularJS 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/openmp-for-python-2.7.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 80 | 81 | 82 | 103 | 104 | 105 | 106 | 107 | true 108 | DEFINITION_ORDER 109 | 110 | 111 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 138 | 139 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 180 | 181 | 198 | 199 | 216 | 217 | 234 | 235 | 252 | 253 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 295 | 296 | 309 | 310 | 327 | 328 | 340 | 341 | project 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 376 | 377 | 396 | 397 | 418 | 419 | 441 | 442 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 1492077291270 493 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 526 | 527 | 529 | 530 | 531 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 694 | 695 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | 731 | 732 | 733 | 734 | 735 | 736 | 737 | 738 | 739 | 740 | 741 | 742 | 743 | 744 | 745 | 746 | 747 | 748 | 749 | 750 | 751 | 752 | 753 | 754 | 755 | 756 | 757 | 758 | 759 | 760 | 761 | 762 | 763 | 764 | 765 | 766 | 767 | 768 | 769 | 770 | 771 | 772 | 773 | 774 | 775 | 776 | 777 | 778 | 779 | 780 | 781 | 782 | 783 | 784 | 785 | 786 | 787 | 788 | 789 | 790 | 791 | 792 | 793 | 794 | 795 | 796 | 797 | 798 | 799 | 800 | 801 | 802 | 803 | 804 | 805 | 806 | 807 | 808 | 809 | 810 | 811 | 812 | 813 | 814 | 815 | 816 | 817 | 818 | 819 | 820 | 821 | 822 | 823 | 824 | 825 | 826 | 827 | 828 | 829 | 830 | 831 | 832 | 833 | 834 | 835 | 836 | 837 | 838 | 839 | 840 | 841 | 842 | 843 | 844 | 845 | 846 | 847 | 848 | 849 | 850 | 851 | 852 | 853 | 854 | 855 | 856 | 857 | 858 | 859 | 860 | 861 | 862 | 863 | 864 | 865 | 866 | 867 | 868 | 869 | 870 | 871 | 872 | 873 | 874 | 875 | 876 | 877 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # OpenMP for Python 2 | 3 | 黎才华,李卓翰 4 | 5 | ## 代码执行方法 6 | 7 | 1. 进入lib文件夹,使用make命令编译 8 | 2. 在主文件夹中对于任意的python程序x.py,可用如下指令生成对应的翻译后的y.py 9 | 3. 再在主文件夹下用jython运行y.py即可,测试集在std_test中 10 | 11 | ~~~bash 12 | ./pyomp x.py y.py 13 | ~~~ 14 | 15 | 16 | ## 问题描述 17 | 18 | ### Python的多线程现状 19 | 20 | 众所周知,Python语言的多线程并行并不好用。其中的一个原因,就是GIL(Global Interpreter Lock)的影响。这是Python解释器的实现CPython引入的概念。由于CPython的内存管理不是线程安全的,因此CPython引入了一个全局信号量来迫使在同一时间只能有一个线程在运行Python的解释器。因此,Python的多线程 (import threading) 是伪多线程。事实上还会出现多线程跑的比单线程慢的情况。虽然这只是Python的其中一个解释器的问题(如Jython等其他实现就没有这种问题)但由于Cpython的广泛使用,这个问题还是比较严重。 21 | 22 | ### OpenMP 23 | 24 | OpenMP是一种并行程序的编译指导方案,使用Shared Memory Model,支持C/C++/Fortran。简单易用,支持多种并行编程的模型。并且在实现上,OpenMP是先将源代码翻译为对应的Pthreads代码,然后再由一般的编译器进行编译。 25 | 26 | ### 我们的问题 27 | 28 | 我们的目标是为Python实现一个多线程并行OpenMP,让添加指令后的程序即使在没有OpenMP的情况下也能够被解释执行,即将代码 29 | 30 | ~~~python 31 | #omp parallel num_threads(8) 32 | print "i love bianyishixi" 33 | #omp parallel end 34 | ~~~ 35 | 36 | 转换为 37 | 38 | ~~~python 39 | def parallel_module(): 40 | print "i love bianyishixi" 41 | 42 | threads = [] 43 | for i in range(8): 44 | threads.append(threading.Thread(target=parallel_module)) 45 | threads[i].start() 46 | ~~~ 47 | 48 | 然后再将转换后的程序交由非CPython的Python解释(如Jython)执行。 49 | 50 | ## 实现方案 51 | 52 | ### 实现语句总表 53 | 54 | ~~~python 55 | #omp parallel [end] [num_threads(n)] [private(v1, v2, …)] 56 | #omp for [nowait] [reduction(op : v1, v2, …)] [schedule(dynamic/static)] 57 | #omp sections [end] 58 | #omp section [end] 59 | #omp critical [end] 60 | #omp barrier 61 | 62 | omp.get_thread_num() 63 | omp.get_num_threads() 64 | omp.set_num_threads(n) 65 | ~~~ 66 | 67 | ### Parallel语句 68 | 69 | 我们整个项目的前半期主要都围绕着如何将parallel语句并行化展开的。我们主要利用了Python可以在函数内部定义函数的性质,我们直接在需要并行的代码块处原地定义一个新的函数,然后再在后面补充相应的对于threading库的调用。我们用 70 | 71 | ~~~python 72 | #omp parallel 73 | #omp parallel end 74 | ~~~ 75 | 76 | 分别来表示并行块的开始和结束,则代码 77 | 78 | ~~~python 79 | def f(): 80 | print "i write dazuoye" 81 | #omp parallel num_threads(8) 82 | print "i love bianyishixi" 83 | #omp parallel end 84 | ~~~ 85 | 86 | 将被翻译为 87 | 88 | ~~~python 89 | def f(): 90 | print "i write dazuoye" 91 | def parallel_module(): 92 | print "i love bianyishixi" 93 | threads = [] 94 | for i in range(8): 95 | threads.append(threading.Thread(target=parallel_module)) 96 | for i in range(8): 97 | threads[i].start() 98 | ~~~ 99 | 100 | 但是这么做会有一个问题:在函数内再定义的函数里被赋值的变量,将会变成新函数的局部变量。如果我们要修改原来函数的局部变量,就必须想办法引用到原先的变量。因此,直接简单的修改会导致变量的作用域发生变化。 101 | 102 | 由于Python的对象分为可变对象与不可变对象,外层函数的可变对象可以被内层函数所修改,因此我们将一个函数的全部局部变量都存在可变对象里(如一个dictionary中),就可以避免上述问题,例如 103 | 104 | ~~~python 105 | def f(): 106 | a = 2 107 | #omp parallel num_threads(8) 108 | print a 109 | a = 1 110 | #omp parallel end 111 | ~~~ 112 | 113 | 就可被翻译为 114 | 115 | ~~~python 116 | def f(): 117 | dic_f[‘a’] = 2 118 | #omp parallel num_threads(8) 119 | print dic_f[‘a’] 120 | dic_f[‘a’] = 1 121 | #omp parallel end 122 | ~~~ 123 | 124 | 但是这样做就意味着我们需要得到原程序中的每一变量具体的作用域,因此我们对整个程序做了语法分析,生成了AST。 125 | 126 | 由于循环块内部会有局部变量(不在线程间共享),我们实现了private语句,用于指定有哪些变量需要作为私有变量,格式如下 127 | 128 | ~~~python 129 | #omp parallel private(x, y, z, ...) 130 | ~~~ 131 | 132 | 133 | ### For语句 134 | 135 | 对于for语句,我们将对于range/xrange的循环与对一般的列表的分开处理。对于range/xrange的循环,例如: 136 | 137 | ~~~python 138 | #omp for 139 | for i in xrange(b, e, s): 140 | dosth 141 | ~~~ 142 | 143 | 我们在库中实现了对应的prange(b, e, s),使得他能够自动根据线程编号来获得其所需要执行的区间所对应的xrange,即将代码翻译成 144 | 145 | ~~~python 146 | for i in omp.prange(b, e, s): 147 | dosth 148 | ~~~ 149 | 150 | 之后,假设循环的区间为 151 | 152 | ~~~python 153 | [0, 1, 2, 3] 154 | ~~~ 155 | 156 | 则当有两个进程执行时,它们将分别执行 157 | 158 | ~~~python 159 | 0: [0, 2] 160 | 1: [1, 3] 161 | ~~~ 162 | 163 | 而对于列表的循环,我们实现了对应的plist函数,它使得每个进程来筛出属于自己的部分序列来执行。例如对于循环 164 | 165 | ~~~python 166 | #omp for 167 | for i in ['x', 'y', 'z', 'w']: 168 | dosth 169 | ~~~ 170 | 171 | 会被翻译为 172 | 173 | ~~~python 174 | for i in omp.plist(['x', 'y', 'z', 'w']): 175 | dosth 176 | ~~~ 177 | 178 | 则当有两个进程执行时,它们将分别执行 179 | 180 | ~~~python 181 | 0: ['x', 'z'] 182 | 1: ['y', 'w'] 183 | ~~~ 184 | 185 | 除了上面的静态调度方式,我们还支持了如下的动态调度方式: 186 | 187 | ~~~python 188 | #omp for schedule(dynamic) 189 | for i in xrange(n): 190 | dosth 191 | ~~~ 192 | 193 | 在这种情况下,每一个进程会动态的执行每一次循环的任务。当一个进程完成任务之后,它会找到目前最近一个未被完成的任务来执行。 194 | 195 | 由于循环的特殊性,我们增加了reduction操作: 196 | 197 | ~~~python 198 | #omp for reduction(+: s) 199 | for i in xrange(n): 200 | s += i 201 | ~~~ 202 | 203 | 它能够使得各个线程间的s互不干扰,然后再在最后做一次reduction操作。具体而言,上述代码将被翻译为 204 | 205 | ~~~python 206 | for i in xrange(n): 207 | tmp_s += i 208 | #omp critical 209 | s += tmp_s 210 | #omp critical end 211 | ~~~ 212 | 213 | 我们的reduction支持基本上所有的算数运算符,具体有: 214 | 215 | ~~~python 216 | +, -, *, max, min, &, |, ^, and, or 217 | ~~~ 218 | 219 | ### Sections语句 220 | 221 | 对于如下语句 222 | 223 | ~~~python 224 | #omp sections 225 | #omp section 226 | A 227 | #omp section end 228 | #omp section 229 | B 230 | #omp section end 231 | #omp sections end 232 | ~~~ 233 | 234 | 翻译过后的程序将会使得有两个线程分别执行代码段A与B。 235 | 236 | 我们考虑将Sections翻译为for循环,再由for语句来完成并行化,即上面的语句将被翻译为 237 | 238 | ~~~python 239 | #omp for 240 | for i in range(2): 241 | if i==0: 242 | A 243 | elif i==1: 244 | B 245 | ~~~ 246 | 247 | ### Critical语句 248 | 249 | critical语句用来确保每条语句在同一时刻只会被一个线程执行,它由用加减锁操作来完成。在翻译源程序时,每碰到一个新的critical语句,分配一个新的全局锁,在代码段前加锁,代码段后解锁。即代码 250 | 251 | ~~~python 252 | #omp critical 253 | x 254 | #omp critical end 255 | ~~~ 256 | 257 | 将被翻译为 258 | 259 | ~~~python 260 | omp.set_lock() 261 | x 262 | omp.unset_lock() 263 | ~~~ 264 | 265 | ### Barrier语句 266 | 267 | Barrier语句的形式为 268 | 269 | ~~~python 270 | #omp barrier 271 | ~~~ 272 | 273 | 这条语句会使得在某一时刻,所有较快的进程等待较慢的进程执行到该点后,再统一执行下面的语句。 274 | 275 | 这一处代码是由三个信号量(mutex)实现的: 276 | 277 | ~~~python 278 | class _Barrier: 279 | def __init__(self, n): 280 | self.n = n 281 | self.count = 0 282 | self.mutex = threading.Semaphore(1) 283 | self.barrier = threading.Semaphore(0) 284 | self.barrier2 = threading.Semaphore(1) 285 | 286 | def wait(self): 287 | self.mutex.acquire() 288 | self.count += 1 289 | if self.count == self.n: 290 | self.barrier2.acquire() 291 | self.barrier.release() 292 | self.mutex.release() 293 | 294 | self.barrier.acquire() 295 | self.barrier.release() 296 | 297 | self.mutex.acquire() 298 | self.count -= 1 299 | if self.count == 0: 300 | self.barrier.acquire() 301 | self.barrier2.release() 302 | self.mutex.release() 303 | 304 | self.barrier2.acquire() 305 | self.barrier2.release() 306 | ~~~ 307 | 308 | 另外,for、sections后均默认存在一个barrier,For的barrier可显式的由nowait指令取消。 309 | 310 | ## 测试集设计 311 | 312 | 我们针对不同的目标设计了两类测试集: 313 | 314 | 1. 第一类测试集是针对不同的OpenMP语句设计测试数据,首先人工比较编译前后的程序是否等价,然后运行程序比较其输出是否合理,以验证编译器的正确性。 315 | 2. 第二类测试集主要针对for语句,把OpenMP for Python分别应用到求和问题/求Pi问题问题上,这两个问题是OpenMP应用的最经典的问题,改变线程数,观察程序性能以说明我们的编译器是有效的。 316 | 317 | 注意:等价性以及正确性的定义是:编译器按OpenMP指令进行编译且运行结果符合编写者的意图,而不完全是编译前后的两个程序输出完全一样(显然不合理且不可能)。 318 | 319 | ### 第一类测试集 320 | 第一类测试集主要关注于全面测试已提供的所有语句的正确性,目的在于用最简单易懂的测试样例说明我们的操作是正确的,为此,附上编译前后的代码,以及比较程序的输出结果予以说明。该测试暂时不考虑性能问题,性能问题会在第二类测试集中涉及。 321 | 322 | #### 0.Variable 323 | 该测试数据并没有任何OpenMP语句,但其判断变量是否需要替换,以及如何替换,关系重大。基本原则是:global变量不替换,local变量放入dictionary并进行替换,介于local和global的变量按python 3中nonlocal语句的逻辑由内向外找然后替换,特殊的local变量如lambda和comprehension中的以及声明了private的local变量不替换。 324 | 325 | 编译前: 326 | 327 | ~~~python 328 | import omp 329 | 330 | class MyClass: 331 | i = 12345 332 | def f(self): 333 | return 'hello world' 334 | tmp = MyClass() 335 | print tmp.f() 336 | 337 | c = 1 338 | f = 2 339 | def func(a,b,*d,**e): 340 | global c,f 341 | return a+b+c+f 342 | print func(3,4,None,None) 343 | 344 | add2 = lambda x,y:x+y 345 | print add2(1,2) 346 | 347 | l = [2*i for i in range(10) if i>0] 348 | print l 349 | 350 | a = 4 351 | def f(): 352 | a = 2 353 | def g(): 354 | b = a 355 | return b 356 | return g() 357 | print f() 358 | ~~~ 359 | 360 | 编译后: 361 | 362 | ~~~python 363 | import omp 364 | omp.set_num_of_internal_locks(0) 365 | class MyClass: 366 | i = 12345 367 | def f(self): 368 | _dict1={} 369 | _dict1['self']=self 370 | return 'hello world' 371 | tmp = MyClass() 372 | print tmp.f() 373 | c = 1 374 | f = 2 375 | def func(a,b,*d,**e): 376 | _dict2={} 377 | _dict2['a']=a 378 | _dict2['b']=b 379 | _dict2['d']=d 380 | _dict2['e']=e 381 | global c,f 382 | return _dict2['a']+_dict2['b']+c+f 383 | print func(3,4,None,None) 384 | add2 = lambda x,y:x+y 385 | print add2(1,2) 386 | l = [2*i for i in range(10) if i>0] 387 | print l 388 | a = 4 389 | def f(): 390 | _dict3={} 391 | _dict3['a'] = 2 392 | def g(): 393 | _dict4={} 394 | _dict4['b'] = _dict3['a'] 395 | return _dict4['b'] 396 | return g() 397 | print f() 398 | pass 399 | ~~~ 400 | 401 | 运行结果比较: 402 | 403 | 编译前: 404 | 405 | ~~~ 406 | hello world 407 | 10 408 | 3 409 | [2, 4, 6, 8, 10, 12, 14, 16, 18] 410 | 2 411 | ~~~ 412 | 413 | 编译后: 414 | 415 | ~~~ 416 | hello world 417 | 10 418 | 3 419 | [2, 4, 6, 8, 10, 12, 14, 16, 18] 420 | 2 421 | ~~~ 422 | 423 | #### 1. Parallel 424 | 425 | Parallel是最基本的语句,如上所述,主要通过新建并行块函数并在末尾用threading多次调用(封装在omp.parallel_run中)以实现多线程。该语句最能体现使用了多线程。编译前: 426 | 427 | ~~~python 428 | import omp 429 | 430 | def hello_world(): 431 | print "i write dazuoye!!!" 432 | print omp.get_thread_num(),'/',omp.get_num_threads() 433 | a = 2017 434 | #omp parallel num_threads(4) 435 | print "i love bianyishixi!" 436 | print omp.get_thread_num(),'/',omp.get_num_threads() 437 | print "a =", a 438 | #omp parallel end 439 | 440 | hello_world() 441 | ~~~ 442 | 443 | 编译后: 444 | 445 | ~~~python 446 | import omp 447 | omp.set_num_of_internal_locks(0) 448 | def hello_world(): 449 | _dict1={} 450 | print "i write dazuoye!!!" 451 | print omp.get_thread_num(),'/',omp.get_num_threads() 452 | _dict1['a'] = 2017 453 | #omp parallel num_threads(4) 454 | def _block0(): 455 | print "i love bianyishixi!" 456 | print omp.get_thread_num(),'/',omp.get_num_threads() 457 | print "a =", _dict1['a'] 458 | #omp parallel end 459 | omp.parallel_run(_block0,4) 460 | hello_world() 461 | pass 462 | ~~~ 463 | 464 | 运行结果比较: 465 | 466 | 编译前: 467 | 468 | ~~~ 469 | i write dazuoye!!! 470 | 0 / 1 471 | i love bianyishixi! 472 | 0 / 1 473 | a = 2017 474 | ~~~ 475 | 476 | 编译后: 477 | 478 | ~~~ 479 | i write dazuoye!!! 480 | 0 / 1 481 | i love bianyishixi! 482 | 0 / 4 483 | a = 2017 484 | i love bianyishixi! 485 | 1 / 4 486 | a = 2017 487 | i love bianyishixi! 488 | 2 / 4 489 | a = 2017 490 | i love bianyishixi! 491 | 3 / 4 492 | a = 2017 493 | ~~~ 494 | 495 | #### 2.Sections 496 | 497 | Sections语句主要利用了下面将要介绍的for语句搭配if语句实现,不需要原地新建函数,与源代码一一对应关系简洁明了。注意默认末尾会添加omp.barrier()(后面将介绍)。 498 | 499 | 编译前: 500 | 501 | ~~~python 502 | import omp 503 | 504 | def sections_test(): 505 | # omp parallel num_threads(2) 506 | # omp sections 507 | # omp section 508 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 509 | # omp section end 510 | # omp section 511 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 512 | # omp section end 513 | # omp sections end 514 | # omp parallel end 515 | 516 | sections_test() 517 | ~~~ 518 | 519 | 编译后: 520 | 521 | ~~~python 522 | import omp 523 | omp.set_num_of_internal_locks(0) 524 | def sections_test(): 525 | _dict1={} 526 | # omp parallel num_threads(2) 527 | def _block0(): 528 | # omp sections 529 | for OMP_SECTIONS_ID in omp.prange(2): 530 | # omp section 531 | if OMP_SECTIONS_ID == 0: 532 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 533 | # omp section end 534 | # omp section 535 | if OMP_SECTIONS_ID == 1: 536 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 537 | # omp section end 538 | # omp sections end 539 | omp.barrier() 540 | # omp parallel end 541 | omp.parallel_run(_block0,2) 542 | sections_test() 543 | pass 544 | ~~~ 545 | 546 | 运行结果比较: 547 | 548 | 编译前: 549 | 550 | ~~~ 551 | section 0 from 0 552 | section 1 from 0 553 | ~~~ 554 | 555 | 编译后: 556 | 557 | ~~~ 558 | section 0 from 0 559 | section 1 from 1 560 | ~~~ 561 | 562 | #### 3.For + reduction + dynamic 563 | 564 | For+reduction的组合是最常用的语句,也是最能体现并行加速效果的语句,关键在于for的多个循环的如何分配给不同线程(使用plist/dlist/prange/drange),以及reduction时需要加锁。另外,此处需要variable替换策略的一点点配合,也即需要新的临时变量存储每个线程的累加/累乘信息,在最后汇总也需要variable替换策略的配合。 565 | 566 | 编译前: 567 | 568 | ~~~python 569 | import omp 570 | 571 | num_step = 1000000 572 | step = 1.0 / num_step 573 | 574 | def calc_pi_for(): 575 | ans = 0 576 | # omp parallel num_threads(8) private(i,x) 577 | # omp for reduction(+:ans) schedule(dynamic) 578 | for i in range(num_step): 579 | x = (i + 0.5) * step 580 | ans += 4.0 / (1.0 + x * x) 581 | # omp parallel end 582 | print ans * step 583 | 584 | calc_pi_for() 585 | ~~~ 586 | 587 | 编译后: 588 | 589 | ~~~python 590 | import omp 591 | omp.set_num_of_internal_locks(1) 592 | num_step = 1000000 593 | step = 1.0 / num_step 594 | def calc_pi_for(): 595 | _dict1={} 596 | _dict1['ans'] = 0 597 | # omp parallel num_threads(8) private(i,x) 598 | def _block0(): 599 | # omp for reduction(+:ans) schedule(dynamic) 600 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+') 601 | for i in omp.drange(num_step): 602 | x = (i + 0.5) * step 603 | OMP_REDUCTION_VAR_0_0 += 4.0 / (1.0 + x * x) 604 | omp.set_internal_lock(0) 605 | _dict1['ans'] = omp.reduction('+',_dict1['ans'],OMP_REDUCTION_VAR_0_0) 606 | omp.unset_internal_lock(0) 607 | omp.barrier() 608 | # omp parallel end 609 | omp.parallel_run(_block0,8) 610 | print _dict1['ans'] * step 611 | calc_pi_for() 612 | pass 613 | ~~~ 614 | 615 | 运行结果比较: 616 | 617 | 编译前: 618 | 619 | ~~~ 620 | 3.14159265359 621 | ~~~ 622 | 623 | 编译后: 624 | 625 | ~~~ 626 | 3.14159265359 627 | ~~~ 628 | 629 | #### 4. For + critical + nowait 630 | 631 | 虽然同样在计算pi的问题中,使用reduction减少了使用锁的次数,会比使用critical(每次累加都需要加锁解锁)要好,但该数据只为了呈现critical的作用,关注点在于正确性,暂时不考虑性能问题。 632 | 633 | 编译前: 634 | 635 | ~~~python 636 | import omp 637 | 638 | num_step = 1000000 639 | step = 1.0 / num_step 640 | 641 | def calc_pi_critical(): 642 | ans = 0 643 | # omp parallel num_threads(8) private(i,x) 644 | # omp for nowait 645 | for i in range(num_step): 646 | x = (i + 0.5) * step 647 | # omp critical 648 | ans += 4.0 / (1.0 + x * x) 649 | # omp critical end 650 | # omp parallel end 651 | print ans * step 652 | 653 | calc_pi_critical() 654 | ~~~ 655 | 656 | 编译后: 657 | 658 | ~~~python 659 | import omp 660 | omp.set_num_of_internal_locks(2) 661 | num_step = 1000000 662 | step = 1.0 / num_step 663 | def calc_pi_critical(): 664 | _dict1={} 665 | _dict1['ans'] = 0 666 | # omp parallel num_threads(8) private(i,x) 667 | def _block0(): 668 | # omp for nowait 669 | for i in omp.prange(num_step): 670 | x = (i + 0.5) * step 671 | # omp critical 672 | omp.set_internal_lock(1) 673 | _dict1['ans'] += 4.0 / (1.0 + x * x) 674 | omp.unset_internal_lock(1) 675 | # omp critical end 676 | # omp parallel end 677 | omp.parallel_run(_block0,8) 678 | print _dict1['ans'] * step 679 | calc_pi_critical() 680 | pass 681 | ~~~ 682 | 683 | 运行结果比较: 684 | 685 | 编译前: 686 | 687 | ~~~ 688 | 3.14159265359 689 | ~~~ 690 | 691 | 编译后: 692 | 693 | ~~~ 694 | 3.14159265359 695 | ~~~ 696 | 697 | #### 5. Barrier 698 | 699 | 在输出a和b之间没有barrier,所以a和b理应会混杂着输出,而输出c之前有barrier,所以快的程序会等待慢的程序,然后连续输出c。 700 | 701 | 编译前: 702 | 703 | ~~~python 704 | import omp 705 | import time 706 | import random 707 | 708 | def barrier_test(): 709 | # omp parallel 710 | print str(omp.get_thread_num()) + ' a\n', 711 | time.sleep(random.randrange(3)) 712 | print str(omp.get_thread_num()) + ' b\n', 713 | time.sleep(random.randrange(3)) 714 | # omp barrier 715 | print str(omp.get_thread_num()) + ' c\n', 716 | # omp parallel end 717 | 718 | barrier_test() 719 | ~~~ 720 | 721 | 编译后: 722 | 723 | ~~~python 724 | import omp 725 | omp.set_num_of_internal_locks(0) 726 | import time 727 | import random 728 | def barrier_test(): 729 | _dict1={} 730 | # omp parallel 731 | def _block0(): 732 | print str(omp.get_thread_num()) + ' a\n', 733 | time.sleep(random.randrange(3)) 734 | print str(omp.get_thread_num()) + ' b\n', 735 | time.sleep(random.randrange(3)) 736 | # omp barrier 737 | omp.barrier() 738 | print str(omp.get_thread_num()) + ' c\n', 739 | # omp parallel end 740 | omp.parallel_run(_block0,0) 741 | barrier_test() 742 | pass 743 | ~~~ 744 | 745 | 运行结果比较: 746 | 747 | 编译前: 748 | 749 | ~~~ 750 | 0 a 751 | 0 b 752 | 0 c 753 | ~~~ 754 | 755 | 编译后: 756 | 757 | ~~~ 758 | 0 a 759 | 1 a 760 | 2 a 761 | 2 b 762 | 3 a 763 | 1 b 764 | 0 b 765 | 3 b 766 | 1 c 767 | 0 c 768 | 3 c 769 | 2 c 770 | ~~~ 771 | 772 | #### 6. Nowait 773 | 774 | 该组测试数据比较特殊,需比较是否包含nowait的两组程序的输出,然后才能体现nowait的作用。也即有nowait时,运行得快的程序会先运行程序后面的部分,先输出done,而没有nowait时,运行得快的程序需要等待慢的程序。 775 | 776 | 带有nowait的: 777 | 778 | ~~~python 779 | import omp 780 | import time 781 | import random 782 | 783 | def nowait_test(): 784 | # omp parallel num_threads(8) private(i) 785 | # omp for nowait 786 | for i in range(8): 787 | time.sleep(random.randrange(3)) 788 | print str(omp.get_thread_num()) + ' thread\n', 789 | print "done" 790 | # omp parallel end 791 | 792 | nowait_test() 793 | ~~~ 794 | 795 | 不带nowait的: 796 | 797 | ~~~python 798 | import omp 799 | import time 800 | import random 801 | 802 | def nowait_test(): 803 | # omp parallel num_threads(8) private(i) 804 | # omp for 805 | for i in range(8): 806 | time.sleep(random.randrange(3)) 807 | print str(omp.get_thread_num()) + ' thread\n', 808 | print "done" 809 | # omp parallel end 810 | 811 | nowait_test() 812 | ~~~ 813 | 814 | 运行结果比较: 815 | 816 | 带有nowait: 817 | 818 | ~~~ 819 | 0 thread 820 | done 821 | 1 thread 822 | done 823 | 3 thread 824 | done 825 | 5 thread 826 | done 827 | 6 thread 828 | done 829 | 2 thread 830 | done 831 | 7 thread 832 | done 833 | 4 thread 834 | done 835 | ~~~ 836 | 837 | 没有nowait: 838 | 839 | ~~~ 840 | 2 thread 841 | 0 thread 842 | 1 thread 843 | 3 thread 844 | 4 thread 845 | 5 thread 846 | 6 thread 847 | 7 thread 848 | done 849 | done 850 | done 851 | done 852 | done 853 | done 854 | done 855 | done 856 | ~~~ 857 | 858 | #### 7. MatrixMultiple 859 | 860 | 除单元测试外,我们也在一般的程序上做了测试,如矩阵乘法就是其中之一,其代码如下: 861 | 862 | ~~~python 863 | import omp 864 | import random 865 | 866 | def genMatrix(n): 867 | return [random.randrange(n) for i in range(n*n)] 868 | 869 | def matrixMul(N, a, b): 870 | res = [0 for i in range(N*N)] 871 | # omp parallel num_threads(2) private(n,i,j,tmp,k) 872 | # omp for 873 | for n in range(N*N): 874 | i = n / N 875 | j = n % N 876 | tmp = 0 877 | for k in range(N): 878 | tmp = tmp + a[i*N+k] * b[k*N+j] 879 | res[n] = tmp 880 | # omp parallel end 881 | return res 882 | 883 | n = 3 884 | a = genMatrix(n) 885 | b = genMatrix(n) 886 | print a 887 | print b 888 | print matrixMul(n, a, b) 889 | ~~~ 890 | 891 | 对于较大的n,我们的程序仍能输出正确的结果。 892 | 893 | ### 第二类测试集: 894 | 895 | #### 1. 求和问题测试程序: 896 | 897 | ~~~python 898 | import omp 899 | 900 | def count(n): 901 | s = 0 902 | #omp parallel private(i) num_threads(2) 903 | if omp.get_thread_num()==0: 904 | print 'num_threads =', omp.get_num_threads() 905 | #omp for reduction(+:s) 906 | for i in xrange(n): 907 | s += i 908 | #omp parallel end 909 | return s 910 | print count(500000000) 911 | ~~~ 912 | 913 | 令n=500000000,在不同的num\_threads的运行时间如下: 914 | 915 | | 线程个数 | 运行时间 | 916 | | :--: | :----: | 917 | | 1 | 20.10s | 918 | | 4 | 9.17s | 919 | 920 | #### 2. 求pi问题测试程序: 921 | 922 | ~~~python 923 | import omp 924 | 925 | num_step = 300000000 926 | step = 1.0 / num_step 927 | 928 | def calc_pi_for(): 929 | ans = 0 930 | # omp parallel num_threads(2) private(i,x) 931 | # omp for reduction(+:ans) 932 | for i in xrange(num_step): 933 | x = (i + 0.5) * step 934 | ans += 4.0 / (1.0 + x * x) 935 | # omp parallel end 936 | print "%.10f\n" % (ans * step), 937 | 938 | calc_pi_for() 939 | ~~~ 940 | 941 | 令num\_step=300000000,运行时间随num\_threads的变化如下: 942 | 943 | | 线程个数 | 运行时间 | 944 | | :--: | :----: | 945 | | 1 | 10.57s | 946 | | 4 | 4.99s | 947 | 948 | ## 完成情况与存在的问题 949 | 950 | 我们完成了开题报告中的全部目标。但我们的项目仍存在一些问题: 951 | 952 | - 鲁棒性不是很强,对于不标准的python代码支持不好 953 | - 如缩进使用tab的代码 954 | - 原理上的问题 955 | - 没有从根本上解决GIL的问题,Jython的通用性、对三方库的支持、以及效率都不能达到CPython的水准 956 | 957 | ## 致谢 958 | - 感谢所有成员的付出与合作 959 | - 感谢老师和助教的指导 960 | - 感谢其他同学的建议与支持 961 | -------------------------------------------------------------------------------- /adds-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | def count(n): 4 | _dict1={} 5 | _dict1['n']=n 6 | _dict1['s'] = 0 7 | #omp parallel private(i) num_threads(2) 8 | def _block0(): 9 | if omp.get_thread_num()==0: 10 | print 'num_threads =', omp.get_num_threads() 11 | #omp for reduction(+:s) 12 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+') 13 | for i in omp.prange(_dict1['n']): 14 | OMP_REDUCTION_VAR_0_0 += i 15 | omp.set_internal_lock(0) 16 | _dict1['s'] = omp.reduction('+',_dict1['s'],OMP_REDUCTION_VAR_0_0) 17 | omp.unset_internal_lock(0) 18 | omp.barrier() 19 | #omp parallel end 20 | omp.parallel_run(_block0,2) 21 | return _dict1['s'] 22 | print count(500000000) 23 | pass 24 | 25 | -------------------------------------------------------------------------------- /adds.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def count(n): 4 | s = 0 5 | #omp parallel private(i) num_threads(2) 6 | if omp.get_thread_num()==0: 7 | print 'num_threads =', omp.get_num_threads() 8 | #omp for reduction(+:s) 9 | for i in xrange(n): 10 | s += i 11 | #omp parallel end 12 | return s 13 | print count(500000000) 14 | -------------------------------------------------------------------------------- /cpu_count.txt: -------------------------------------------------------------------------------- 1 | 8 2 | -------------------------------------------------------------------------------- /lib/Core.cpp: -------------------------------------------------------------------------------- 1 | #include "Core.h" 2 | 3 | #include "assert.h" 4 | 5 | int findVariable(Node_t *P, string name) 6 | { 7 | while (P && P->local.find(name) == P->local.end()) 8 | P = P->father; 9 | if (P) return P->nodeId; 10 | return -1; 11 | } 12 | 13 | void changeVariable(Node_t *P) 14 | { 15 | cerr << "changeVariable " << P->name << endl; 16 | 17 | int n = P->logId.size(); 18 | int comDep = 0, lamDep = 0; 19 | set lamArg; 20 | stack forLine; 21 | map forVar; 22 | int codeL = -1, span = 0; 23 | int ptrBlock = 0; 24 | for (int i = 0; i < n; ++i) 25 | { 26 | string log = Log[P->logId[i]]; 27 | string type = parseLog(log, 0); 28 | if (type == "arg" || type == "vararg" || type == "kwarg") 29 | { 30 | if (lamDep) 31 | lamArg.insert(parseLog(log, 1)); 32 | else 33 | { 34 | P->local.insert(parseLog(log, 1)); 35 | string name = parseLog(log, 1); 36 | string tmpstr = P->space + dictKeyWord + ItoS(P->nodeId) + "['" + name + "']=" + name; 37 | addCode[P->sLine].push_back(make_pair(0, tmpstr)); 38 | } 39 | } else 40 | if (type == "global") 41 | { 42 | P->global.insert(parseLog(log, 1)); 43 | } else 44 | if (type == "comprehension") 45 | { 46 | if (parseLog(log, 1) == "start") 47 | ++comDep; 48 | else 49 | --comDep; 50 | } else 51 | if (type == "lambda") 52 | { 53 | if (parseLog(log, 1) == "start") 54 | { 55 | ++lamDep; 56 | lamArg.clear(); 57 | } 58 | else 59 | --lamDep; 60 | } else 61 | if (type == "for") 62 | { 63 | if (parseLog(log, 1) == "begin") 64 | { 65 | int line = StoI(parseLog(log, 2)) - 1; 66 | forLine.push(line); 67 | map::iterator it = line2For.find(line); 68 | if (it != line2For.end()) 69 | { 70 | paraFor_t *ptr = it->second; 71 | int NofVar = ptr->varName.size(); 72 | for (int j = 0; j < NofVar; ++j) 73 | forVar.insert(make_pair(ptr->varName[j], "OMP_REDUCTION_VAR_" + ItoS(ptr->lockId) + "_" + ItoS(j))); 74 | } 75 | } 76 | else 77 | { 78 | int line = forLine.top(); 79 | forLine.pop(); 80 | map::iterator it = line2For.find(line); 81 | if (it != line2For.end()) 82 | { 83 | paraFor_t *ptr = it->second; 84 | int NofVar = ptr->varName.size(); 85 | for (int j = 0; j < NofVar; ++j) 86 | { 87 | forVar.erase(ptr->varName[j]); 88 | string rpStr; 89 | if (P->local.find(ptr->varName[j]) != P->local.end()) 90 | rpStr = dictKeyWord+ItoS(P->nodeId)+"['"+ptr->varName[j]+"']"; 91 | else 92 | { 93 | int id = findVariable(P, ptr->varName[j]); 94 | if (id == -1) continue; 95 | rpStr = dictKeyWord+ItoS(id)+"['"+ptr->varName[j]+"']"; 96 | } 97 | ptr->varName[j] = rpStr; 98 | } 99 | } 100 | } 101 | } else 102 | if (type == "variable") 103 | { 104 | int tmpCodeL = StoI(parseLog(log, 1)); 105 | if (tmpCodeL != codeL) 106 | { 107 | span = 0; 108 | codeL = tmpCodeL; 109 | while (ptrBlock < NofBlock && codeL > Block[ptrBlock]->tLine) 110 | ++ptrBlock; 111 | } 112 | int col = StoI(parseLog(log, 2)); 113 | string name = parseLog(log, 3); 114 | string act = parseLog(log, 4); 115 | if (forVar.find(name) != forVar.end()) 116 | { 117 | string rpStr = forVar[name]; 118 | Code[codeL].replace(col+span, name.length(), rpStr); 119 | span += rpStr.length() - name.length(); 120 | continue; 121 | } 122 | if (P->name == "global") continue; 123 | if (comDep && act == "store") continue; 124 | if (lamDep && lamArg.find(name) != lamArg.end()) continue; 125 | if (P->global.find(name) != P->global.end()) continue; 126 | if (ptrBlock < NofBlock && Block[ptrBlock]->sLine < codeL 127 | && Block[ptrBlock]->privateVar.find(name) != Block[ptrBlock]->privateVar.end()) continue; 128 | if (act == "store" || act == "del") 129 | { 130 | if (act == "store") 131 | P->local.insert(name); 132 | else 133 | P->local.erase(name); 134 | string rpStr = dictKeyWord+ItoS(P->nodeId)+"['"+name+"']"; 135 | Code[codeL].replace(col+span, name.length(), rpStr); 136 | span += rpStr.length() - name.length(); 137 | } else 138 | if (act == "load") 139 | { 140 | int id = findVariable(P, name); 141 | if (id == -1) continue; 142 | string rpStr = dictKeyWord+ItoS(id)+"['"+name+"']"; 143 | Code[codeL].replace(col+span, name.length(), rpStr); 144 | span += rpStr.length() - name.length(); 145 | } 146 | } 147 | } 148 | n = P->child.size(); 149 | for (int i = 0; i < n; ++i) 150 | changeVariable(P->child[i]); 151 | } 152 | 153 | Node_t *findBelong(Node_t *P, int line) 154 | { 155 | if (line < P->sLine || P->tLine < line) 156 | return NULL; 157 | int n = P->child.size(); 158 | for (int i = 0; i < n; ++i) 159 | { 160 | Node_t *ret = findBelong(P->child[i], line); 161 | if (ret) return ret; 162 | } 163 | return P; 164 | } 165 | 166 | void AddThreading(int line, string space, string funName, int numThreads) 167 | { 168 | string tmpstr; 169 | tmpstr = space + "omp.parallel_run(" + funName + "," + ItoS(numThreads) + ")"; 170 | addCode[line].push_back(make_pair(0, tmpstr)); 171 | } 172 | 173 | void OpenMPforPython() 174 | { 175 | cerr << "paralleling" << endl; 176 | for (int i = 0; i < NofBlock; ++i) 177 | { 178 | string space = ""; 179 | int tmp = countNofSpace(Code[Block[i]->sLine+1]); 180 | for (int j = 0; j < tmp; ++j) 181 | space.push_back(' '); 182 | 183 | /* parallel */ 184 | cerr << "parallel" << endl; 185 | string tmpstr = space + "def " + blockKeyWord + ItoS(i) + "():"; 186 | addCode[Block[i]->sLine].push_back(make_pair(0, tmpstr)); 187 | Node_t *father = findBelong(root, Block[i]->sLine); 188 | if (father->name != "global") 189 | for (set::iterator it = father->global.begin(); it != father->global.end(); ++it) 190 | { 191 | tmpstr = space + " " + "global " + *it; 192 | addCode[Block[i]->sLine].push_back(make_pair(0, tmpstr)); 193 | } 194 | else 195 | { 196 | // global for global 197 | } 198 | for (int j = Block[i]->sLine+1; j < Block[i]->tLine; ++j) 199 | Code[j] = " " + Code[j]; 200 | AddThreading(Block[i]->tLine, space, blockKeyWord+ItoS(i), Block[i]->numThreads); 201 | 202 | /* sections */ 203 | int NofSections = Block[i]->sections.size(); 204 | for (int id1 = 0; id1 < NofSections; ++id1) 205 | { 206 | int sLine = Block[i]->sections[id1].sLine; 207 | int NofSection = Block[i]->sections[id1].section.size(); 208 | tmpstr = space + " for OMP_SECTIONS_ID in omp.prange(" + ItoS(NofSection) + "):"; 209 | addCode[sLine].push_back(make_pair(0, tmpstr)); 210 | for (int id2 = 0; id2 < NofSection; ++id2) 211 | { 212 | int sLine = Block[i]->sections[id1].section[id2].sLine; 213 | int tLine = Block[i]->sections[id1].section[id2].tLine; 214 | string tmpstr = space + " if OMP_SECTIONS_ID == " + ItoS(id2) + ":"; 215 | addCode[sLine].push_back(make_pair(0, tmpstr)); 216 | for (int j = sLine+1; j < tLine; ++j) 217 | Code[j] = " " + Code[j]; 218 | } 219 | int tLine = Block[i]->sections[id1].tLine; 220 | if (Block[i]->sections[id1].nowait == false) 221 | { 222 | tmpstr = space + " omp.barrier()"; 223 | addCode[tLine].push_back(make_pair(1, tmpstr)); 224 | } 225 | } 226 | 227 | /* for */ 228 | int NofFor = Block[i]->paraFor.size(); 229 | for (int id = 0; id < NofFor; ++id) 230 | { 231 | int line = Block[i]->paraFor[id].sLine; 232 | int NofVar = Block[i]->paraFor[id].varName.size(); 233 | if (NofVar != 0) 234 | { 235 | for (int j = 0; j < NofVar; ++j) 236 | { 237 | tmpstr = space + " OMP_REDUCTION_VAR_" + ItoS(Block[i]->paraFor[id].lockId) + 238 | "_" + ItoS(j) + " = omp.reduction_init('" + Block[i]->paraFor[id].operCh[j] + "')"; 239 | addCode[line-1].push_back(make_pair(0, tmpstr)); 240 | } 241 | } 242 | int spos = Code[line].find(" in ") + 4; 243 | int tpos = Code[line].find(":") - 1; 244 | string subline = strip(Code[line].substr(spos, tpos-spos+1)); 245 | string str = (Block[i]->paraFor[id].mode == "dynamic")? "omp.drange" : "omp.prange"; 246 | if (begin_with(subline, "xrange(")) 247 | Code[line].replace(Code[line].find("xrange"), 6, str); 248 | else if (begin_with(subline, "range(")) 249 | Code[line].replace(Code[line].find("range"), 5, str); 250 | else 251 | { 252 | str = (Block[i]->paraFor[id].mode == "dynamic")? "omp.dlist(" : "omp.plist("; 253 | Code[line].insert(tpos+1, ")"); 254 | Code[line].insert(spos, str); 255 | } 256 | int tLine = Block[i]->paraFor[id].tLine; 257 | if (NofVar != 0) 258 | { 259 | for (int j = 0; j < NofVar; ++j) 260 | { 261 | tmpstr = space + " omp.set_internal_lock(" + ItoS(Block[i]->paraFor[id].lockId) + ")"; 262 | addCode[tLine].push_back(make_pair(0, tmpstr)); 263 | tmpstr = space + " " + Block[i]->paraFor[id].varName[j] + 264 | " = omp.reduction('" + Block[i]->paraFor[id].operCh[j] + "'," + 265 | Block[i]->paraFor[id].varName[j] + "," + "OMP_REDUCTION_VAR_" + 266 | ItoS(Block[i]->paraFor[id].lockId) + "_" + ItoS(j) + ")"; 267 | addCode[tLine].push_back(make_pair(1, tmpstr)); 268 | tmpstr = space + " omp.unset_internal_lock(" + ItoS(Block[i]->paraFor[id].lockId) + ")"; 269 | addCode[tLine].push_back(make_pair(2, tmpstr)); 270 | } 271 | } 272 | if (Block[i]->paraFor[id].nowait == false) 273 | { 274 | tmpstr = space + " omp.barrier()"; 275 | addCode[tLine].push_back(make_pair(3, tmpstr)); 276 | } 277 | } 278 | 279 | /* barrier */ 280 | int NofBarrier = Block[i]->barrier.size(); 281 | for (int id = 0; id < NofBarrier; ++id) 282 | { 283 | int line = Block[i]->barrier[id]; 284 | tmpstr = space + " omp.barrier()"; 285 | addCode[line].push_back(make_pair(0, tmpstr)); 286 | } 287 | 288 | /* critical */ 289 | int NofCritical = Block[i]->critical.size(); 290 | for (int id = 0; id < NofCritical; ++id) 291 | { 292 | int sLine = Block[i]->critical[id].sLine; 293 | int tLine = Block[i]->critical[id].tLine; 294 | tmpstr = space + " omp.set_internal_lock(" + ItoS(Block[i]->critical[id].lockId) + ")"; 295 | addCode[sLine].push_back(make_pair(0, tmpstr)); 296 | tmpstr = space + " omp.unset_internal_lock(" + ItoS(Block[i]->critical[id].lockId) + ")"; 297 | addCode[tLine-1].push_back(make_pair(0, tmpstr)); 298 | } 299 | } 300 | } -------------------------------------------------------------------------------- /lib/Core.h: -------------------------------------------------------------------------------- 1 | #ifndef CORE_H 2 | #define CORE_H 3 | 4 | #include 5 | #include 6 | 7 | #include "Toolkit.h" 8 | #include "OMP.h" 9 | #include "PY.h" 10 | 11 | void changeVariable(Node_t*); 12 | Node_t *findBelong(Node_t*, int); 13 | void AddThreading(int, string, string, int); 14 | void OpenMPforPython(); 15 | 16 | extern map line2For; 17 | extern vector< vector > > addCode; 18 | extern vector Block; 19 | extern int NofBlock; 20 | 21 | #endif -------------------------------------------------------------------------------- /lib/OMP.cpp: -------------------------------------------------------------------------------- 1 | #include "OMP.h" 2 | #include "Toolkit.h" 3 | 4 | paraFor_t::paraFor_t(int _lineId) 5 | { 6 | lineId = _lineId; 7 | sLine = _lineId+1; 8 | lockId = NofLock++; 9 | operCh.clear(); 10 | varName.clear(); 11 | nowait = false; 12 | mode = ""; 13 | } 14 | 15 | Critical_t::Critical_t(int _sLine) 16 | { 17 | lockId = NofLock++; 18 | sLine = _sLine; 19 | tLine = 0; 20 | } 21 | 22 | Section_t::Section_t(int _sLine) 23 | { 24 | sLine = _sLine; 25 | } 26 | 27 | Sections_t::Sections_t(int _sLine) 28 | { 29 | section.clear(); 30 | sLine = _sLine; 31 | tLine = 0; 32 | nowait = false; 33 | } 34 | 35 | Block_t::Block_t(int _sLine) 36 | { 37 | numThreads = 0; 38 | sLine = _sLine; 39 | tLine = 0; 40 | paraFor.clear(); 41 | critical.clear(); 42 | sections.clear(); 43 | barrier.clear(); 44 | privateVar.clear(); 45 | } 46 | 47 | void InputOMP(string OMP_log) 48 | { 49 | cerr << "Input OMP" << endl; 50 | 51 | Block_t *tmpBlock = NULL; 52 | ifstream fin(OMP_log.c_str()); 53 | int ForOrSect = 0; 54 | string line; 55 | while (!fin.eof()) 56 | { 57 | getline(fin, line); 58 | if (line == "") continue; 59 | string type = parseLog(line, 0); 60 | if (type == "parallel") 61 | { 62 | if (parseLog(line, 2) == "begin") 63 | tmpBlock = new Block_t(StoI(parseLog(line, 1))); 64 | else 65 | { 66 | tmpBlock->tLine = StoI(parseLog(line, 1)); 67 | Block.push_back(tmpBlock); 68 | tmpBlock = NULL; 69 | } 70 | } else 71 | { 72 | if (!tmpBlock) continue; 73 | if (type == "private_var") 74 | tmpBlock->privateVar.insert(parseLog(line, 1)); 75 | else if (type == "num_threads") 76 | tmpBlock->numThreads = StoI(parseLog(line, 1)); 77 | else if (type == "nowait") 78 | { 79 | if (parseLog(line, 1) == "False") continue; 80 | if (ForOrSect == 1) 81 | { 82 | int id = tmpBlock->paraFor.size()-1; 83 | tmpBlock->paraFor[id].nowait = true; 84 | } 85 | else if (ForOrSect == 2) 86 | { 87 | int id = tmpBlock->sections.size()-1; 88 | tmpBlock->sections[id].nowait = true; 89 | } 90 | } 91 | else if (type == "barrier") 92 | tmpBlock->barrier.push_back(StoI(parseLog(line, 1))); 93 | else if (type == "for") 94 | { 95 | ForOrSect = 1; 96 | int lineId = StoI(parseLog(line, 1)); 97 | tmpBlock->paraFor.push_back(paraFor_t(lineId)); 98 | int tmp = tmpBlock->paraFor.size() - 1; 99 | line2For.insert(make_pair(lineId, &tmpBlock->paraFor[tmp])); 100 | } 101 | else if (type == "reduction") 102 | { 103 | int id = tmpBlock->paraFor.size()-1; 104 | tmpBlock->paraFor[id].operCh.push_back(parseLog(line, 1)); 105 | tmpBlock->paraFor[id].varName.push_back(parseLog(line, 2)); 106 | } 107 | else if (type == "scheduling_type") 108 | { 109 | int id = tmpBlock->paraFor.size()-1; 110 | tmpBlock->paraFor[id].mode = parseLog(line, 1); 111 | } 112 | else if (type == "critical") 113 | { 114 | if (parseLog(line, 2) == "begin") 115 | tmpBlock->critical.push_back(Critical_t(StoI(parseLog(line, 1)))); 116 | else 117 | { 118 | int id = tmpBlock->critical.size()-1; 119 | tmpBlock->critical[id].tLine = StoI(parseLog(line, 1)); 120 | } 121 | } 122 | else if (type == "sections") 123 | { 124 | ForOrSect = 2; 125 | if (parseLog(line, 2) == "begin") 126 | tmpBlock->sections.push_back(Sections_t(StoI(parseLog(line, 1)))); 127 | else 128 | { 129 | int id = tmpBlock->sections.size()-1; 130 | tmpBlock->sections[id].tLine = StoI(parseLog(line, 1)); 131 | } 132 | } 133 | else if (type == "section") 134 | { 135 | int id1 = tmpBlock->sections.size()-1; 136 | if (parseLog(line, 2) == "begin") 137 | tmpBlock->sections[id1].section.push_back(Section_t(StoI(parseLog(line, 1)))); 138 | else 139 | { 140 | int id2 = tmpBlock->sections[id1].section.size()-1; 141 | tmpBlock->sections[id1].section[id2].tLine = StoI(parseLog(line, 1)); 142 | } 143 | } 144 | } 145 | } 146 | NofBlock = Block.size(); 147 | cerr << "NofBlock = " << NofBlock << endl; 148 | 149 | fin.close(); 150 | } -------------------------------------------------------------------------------- /lib/OMP.h: -------------------------------------------------------------------------------- 1 | #ifndef OMP_H 2 | #define OMP_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | using namespace std; 12 | 13 | struct paraFor_t 14 | { 15 | int lineId; 16 | int sLine, tLine; 17 | int lockId; 18 | vector operCh; // for reduction 19 | vector varName; 20 | string mode; // for static/dynamic 21 | bool nowait; 22 | paraFor_t(int _lindId); 23 | }; 24 | 25 | struct Critical_t 26 | { 27 | int lockId; 28 | int sLine, tLine; 29 | Critical_t(int _sLine); 30 | }; 31 | 32 | struct Section_t 33 | { 34 | int sLine, tLine; 35 | Section_t(int _sLine); 36 | }; 37 | 38 | struct Sections_t 39 | { 40 | int sLine, tLine; 41 | bool nowait; 42 | vector section; 43 | Sections_t(int _sLine); 44 | }; 45 | 46 | struct Block_t 47 | { 48 | int numThreads; 49 | int sLine, tLine; 50 | vector barrier; 51 | set privateVar; 52 | vector paraFor; 53 | vector critical; 54 | vector sections; 55 | Block_t(int _sLine); 56 | }; 57 | 58 | void InputOMP(string OMP_log); 59 | 60 | extern int NofLock; 61 | extern int NofBlock; 62 | extern vector Block; 63 | extern map line2For; 64 | 65 | #endif -------------------------------------------------------------------------------- /lib/PY.cpp: -------------------------------------------------------------------------------- 1 | #include "PY.h" 2 | #include "Toolkit.h" 3 | 4 | Node_t::Node_t(string _name, int _sLine, Node_t *_father) 5 | { 6 | cerr << "new Node " << _name << endl; 7 | nodeId = NofNode++; 8 | name = _name; 9 | sLine = _sLine; 10 | father = _father; 11 | if (name == "global") 12 | { 13 | tLine = NofCode; 14 | space = ""; 15 | } 16 | logId.clear(); 17 | child.clear(); 18 | local.clear(); 19 | global.clear(); 20 | } 21 | 22 | int Node_t::build(int sLogL) 23 | { 24 | while (sLogL < NofLog) 25 | { 26 | while (sLogL < NofLog && parseLog(Log[sLogL], 0) != "function") 27 | logId.push_back(sLogL++); 28 | if (sLogL == NofLog) break; 29 | if (parseLog(Log[sLogL], 1) == "end") 30 | { 31 | tLine = StoI(parseLog(Log[sLogL], 3)); 32 | int tmp = INF; 33 | for (int i = sLine+1; i < tLine; ++i) 34 | tmp = min(tmp, countNofSpace(Code[i])); 35 | space = ""; 36 | for (int j = 0; j < tmp; ++j) 37 | space.push_back(' '); 38 | string tmpstr = space + dictKeyWord + ItoS(nodeId) + "={}"; 39 | addCode[sLine].push_back(make_pair(0, tmpstr)); 40 | ++sLogL; break; 41 | } 42 | else 43 | { 44 | Node_t *ch = new Node_t(parseLog(Log[sLogL], 2), StoI(parseLog(Log[sLogL], 3)), this); 45 | sLogL = ch->build(sLogL+1); 46 | child.push_back(ch); 47 | } 48 | } 49 | return sLogL; 50 | } 51 | 52 | void InputPY(string PY_code, string PY_log) 53 | { 54 | cerr << "Input PY" << endl; 55 | 56 | string line; 57 | ifstream fin(PY_code.c_str()); 58 | Code.push_back(""); 59 | while (!fin.eof()) 60 | { 61 | getline(fin, line); 62 | Code.push_back(line); 63 | } 64 | NofCode = Code.size(); 65 | fin.close(); 66 | 67 | stack forLine; 68 | fin.open(PY_log.c_str()); 69 | while (!fin.eof()) 70 | { 71 | getline(fin, line); 72 | Log.push_back(line); 73 | if (parseLog(line, 0) == "for") 74 | { 75 | if (parseLog(line, 1) == "begin") 76 | forLine.push(StoI(parseLog(line, 2)) - 1); 77 | else 78 | { 79 | int lineId = forLine.top(); 80 | forLine.pop(); 81 | map::iterator it = line2For.find(lineId); 82 | if (it != line2For.end()) 83 | { 84 | paraFor_t *ptr = line2For[lineId]; 85 | ptr->tLine = StoI(parseLog(line, 2)) - 1; 86 | } 87 | } 88 | } 89 | } 90 | NofLog = Log.size(); 91 | fin.close(); 92 | 93 | cerr << "NofCode = " << NofCode << endl; 94 | cerr << "NofLog = " << NofLog << endl; 95 | 96 | addCode.resize(NofCode); 97 | for (int i = 0; i < NofCode; ++i) 98 | { 99 | string line = strip(Code[i]); 100 | if (begin_with(line, "import ") && end_with(line, " omp")) 101 | { 102 | string tmpstr = "omp.set_num_of_internal_locks(" + ItoS(NofLock) + ")"; 103 | addCode[i].push_back(make_pair(0, tmpstr)); 104 | break; 105 | } 106 | } 107 | root = new Node_t("global", 0, NULL); 108 | root->build(0); 109 | } -------------------------------------------------------------------------------- /lib/PY.h: -------------------------------------------------------------------------------- 1 | #ifndef PY_H 2 | #define PY_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #include "OMP.h" 13 | #include "Toolkit.h" 14 | 15 | using namespace std; 16 | 17 | struct Node_t 18 | { 19 | int nodeId; 20 | string name, space; 21 | int sLine, tLine; 22 | vector logId; 23 | vector child; 24 | set local; 25 | set global; 26 | Node_t *father; 27 | 28 | Node_t(string _name, int _sLine, Node_t *_father); 29 | int build(int sLogL); 30 | }; 31 | 32 | void InputPY(string PY_code, string PY_log); 33 | 34 | extern int NofLog; 35 | extern int NofCode; 36 | extern int NofNode; 37 | extern int NofLock; 38 | extern vector Log; 39 | extern vector Code; 40 | extern vector< vector > > addCode; 41 | extern struct Node_t *root; 42 | extern map line2For; 43 | 44 | #endif -------------------------------------------------------------------------------- /lib/Toolkit.cpp: -------------------------------------------------------------------------------- 1 | #include "Toolkit.h" 2 | 3 | string ItoS(int x) 4 | { 5 | string y; 6 | stringstream f; 7 | f << x; f >> y; 8 | return y; 9 | } 10 | 11 | int StoI(string x) 12 | { 13 | int y; 14 | stringstream f; 15 | f << x; f >> y; 16 | return y; 17 | } 18 | 19 | string strip(string s) 20 | { 21 | int l = 0, r = s.length()-1; 22 | while (l < r && (s[l] == ' ' || s[l] == '\t')) ++l; 23 | while (l < r && (s[r] == ' ' || s[r] == '\t')) --r; 24 | return s.substr(l, r-l+1); 25 | } 26 | 27 | bool begin_with(string s, string sub) 28 | { 29 | int la = s.length(); 30 | int lb = sub.length(); 31 | if (la < lb) return false; 32 | for (int i = 0; i < lb; ++i) 33 | if (s[i] != sub[i]) return false; 34 | return true; 35 | } 36 | 37 | bool end_with(string s, string sub) 38 | { 39 | int la = s.length(); 40 | int lb = sub.length(); 41 | if (la < lb) return false; 42 | for (int i = 1; i <= lb; ++i) 43 | if (s[la-i] != sub[lb-i]) return false; 44 | return true; 45 | } 46 | 47 | int countNofSpace(string code) 48 | { 49 | int n = 0; 50 | while (code[n] == ' ' || code[n] == '\t') 51 | ++n; 52 | return n; 53 | } 54 | 55 | string parseLog(string log, int k) 56 | { 57 | log.push_back(' '); 58 | for (int i = 0; i < k; ++i) 59 | log.erase(0, log.find(" ")+1); 60 | return log.substr(0, log.find(" ")); 61 | } 62 | 63 | void output(string OUT_code) 64 | { 65 | cerr << "output()" << endl; 66 | 67 | ofstream fout(OUT_code.c_str()); 68 | for (int i = 1; i < NofCode; ++i) 69 | { 70 | fout << Code[i] << endl; 71 | int n = addCode[i].size(); 72 | for (int j = 0; j < n; ++j) 73 | for (int k = j+1; k < n; ++k) 74 | if (addCode[i][j].first > addCode[i][k].first) 75 | swap(addCode[i][j], addCode[i][k]); 76 | for (int j = 0; j < n; ++j) 77 | fout << addCode[i][j].second << endl; 78 | } 79 | fout.close(); 80 | } -------------------------------------------------------------------------------- /lib/Toolkit.h: -------------------------------------------------------------------------------- 1 | #ifndef TOOLKIT_H 2 | #define TOOLKIT_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std; 11 | 12 | const int INF = (1<<30)-1; 13 | 14 | const string dictKeyWord = "_dict"; 15 | const string blockKeyWord = "_block"; 16 | 17 | string ItoS(int x); 18 | int StoI(string x); 19 | string strip(string s); 20 | bool begin_with(string s, string sub); 21 | bool end_with(string s, string sub); 22 | int countNofSpace(string code); 23 | string parseLog(string log, int k); 24 | void output(string); 25 | 26 | extern vector< vector > > addCode; 27 | extern vector Code; 28 | extern int NofCode; 29 | 30 | #endif -------------------------------------------------------------------------------- /lib/cpu_count.py: -------------------------------------------------------------------------------- 1 | from multiprocessing import cpu_count 2 | 3 | print cpu_count() -------------------------------------------------------------------------------- /lib/delete_empty_lines.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | if len(sys.argv)!=2: 4 | raise ValueError('File name needed!') 5 | 6 | file = open(sys.argv[1]) 7 | for line in file: 8 | if len(line.lstrip().rstrip()) != 0: 9 | if line[-1] == '\n': 10 | print line, 11 | else: 12 | print line 13 | 14 | print 'pass' -------------------------------------------------------------------------------- /lib/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "OMP.h" 4 | #include "PY.h" 5 | #include "Core.h" 6 | 7 | int NofBlock = 0; 8 | int NofLock = 0; 9 | int NofLog = 0; 10 | int NofCode = 0; 11 | int NofNode = 0; 12 | 13 | string PY_code, PY_log, OMP_log, OUT_code; 14 | vector< vector > > addCode; 15 | vector Code, Log; 16 | vector Block; 17 | struct Node_t *root; 18 | 19 | map line2For; 20 | 21 | void initialize() 22 | { 23 | addCode.clear(); 24 | Code.clear(); 25 | Log.clear(); 26 | Block.clear(); 27 | root = NULL; 28 | line2For.clear(); 29 | } 30 | 31 | void ParseArgv(int argc, char **argv) 32 | { 33 | if (argc != 5) 34 | { 35 | fprintf(stderr, "Usage: %s PY_code PY_log OMP_log OUT_code\n", argv[0]); 36 | exit(-1); 37 | } 38 | PY_code = (string)argv[1]; 39 | PY_log = (string)argv[2]; 40 | OMP_log = (string)argv[3]; 41 | OUT_code = (string)argv[4]; 42 | } 43 | 44 | int main(int argc, char **argv) 45 | { 46 | initialize(); 47 | ParseArgv(argc, argv); 48 | InputOMP(OMP_log); 49 | InputPY(PY_code, PY_log); 50 | changeVariable(root); 51 | OpenMPforPython(); 52 | output(OUT_code); 53 | 54 | return 0; 55 | } -------------------------------------------------------------------------------- /lib/main.h: -------------------------------------------------------------------------------- 1 | #ifndef MAIN_H 2 | #define MAIN_H 3 | 4 | 5 | #endif -------------------------------------------------------------------------------- /lib/makefile: -------------------------------------------------------------------------------- 1 | transform: Toolkit.cpp OMP.cpp PY.cpp Core.cpp main.cpp 2 | g++ Toolkit.cpp OMP.cpp PY.cpp Core.cpp main.cpp -o transform 3 | clean: 4 | rm transform 5 | -------------------------------------------------------------------------------- /lib/parse_omp.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | def parse_args(line): 4 | args = [] 5 | line = line.lstrip() 6 | split_line = line.split() 7 | if (len(split_line)>0 and split_line[0] == '#omp') \ 8 | or (len(split_line)>1 and split_line[0] == '#' and split_line[1] == 'omp'): 9 | args.append('#omp') 10 | spos = line.find('omp') 11 | line = line[spos+3:] 12 | pcnt = 0 13 | tstr = '' 14 | for ch in line: 15 | if ch == ' ' or ch == '\t' or ch =='\n' or ch == '\r': 16 | if pcnt == 0: 17 | if tstr != '': 18 | args.append(tstr) 19 | tstr = '' 20 | else: 21 | if ch=='(': 22 | pcnt+=1 23 | elif ch==')': 24 | pcnt-=1 25 | tstr += ch 26 | if tstr != '': 27 | args.append(tstr) 28 | else: 29 | args.append('not omp') 30 | return args 31 | 32 | if len(sys.argv)!=2: 33 | raise ValueError('File name needed!') 34 | 35 | file = open(sys.argv[1]) 36 | 37 | lineno = 0 38 | for line in file: 39 | lineno += 1 40 | args = parse_args(line) 41 | #print args 42 | n_of_args = len(args) 43 | if n_of_args > 0 and args[0] == '#omp': 44 | if n_of_args > 1: 45 | if args[1] == 'parallel': 46 | print 'parallel', lineno, 47 | if n_of_args > 2 and args[2] == 'end': 48 | print 'end', 49 | begin = False 50 | else: 51 | print 'begin', 52 | begin = True 53 | print 54 | 55 | if begin: 56 | num_threads = 0 57 | private_vars = [] 58 | for arg in args: 59 | if arg.startswith('num_threads('): 60 | start = arg.find('(')+1 61 | end = arg.find(')') 62 | num_threads = int(arg[start:end]) 63 | if arg.startswith('private('): 64 | start = arg.find('(')+1 65 | end = arg.find(')') 66 | vars = arg[start:end].split(',') 67 | vars = [var.strip() for var in vars] 68 | private_vars += vars 69 | print 'num_threads', num_threads 70 | for var in private_vars: 71 | print 'private_var', var 72 | elif args[1] == 'for': 73 | print 'for', lineno 74 | scheduling_type = 'static' 75 | nowait = False 76 | for arg in args: 77 | if arg.startswith('reduction('): 78 | start = arg.find('(') + 1 79 | end = arg.find(')') 80 | op, vars = arg[start:end].split(':') 81 | op = op.strip() 82 | vars = vars.split(',') 83 | vars = [var.strip() for var in vars] 84 | for var in vars: 85 | print 'reduction', op, var 86 | if arg.startswith('schedule('): 87 | start = arg.find('(') + 1 88 | end = arg.find(')') 89 | scheduling_type = arg[start:end].strip() 90 | if arg == 'nowait': 91 | nowait = True 92 | print 'nowait', nowait 93 | print 'scheduling_type', scheduling_type 94 | elif args[1] == 'critical': 95 | print 'critical', lineno, 96 | if n_of_args > 2 and args[2] == 'end': 97 | print 'end', 98 | else: 99 | print 'begin', 100 | print 101 | elif args[1] == 'barrier': 102 | print 'barrier', lineno 103 | elif args[1] == 'sections': 104 | print 'sections', lineno, 105 | if n_of_args > 2 and args[2] == 'end': 106 | print 'end', 107 | else: 108 | print 'begin', 109 | print 110 | elif args[1] == 'section': 111 | print 'section', lineno, 112 | if n_of_args > 2 and args[2] == 'end': 113 | print 'end', 114 | else: 115 | print 'begin', 116 | print 117 | 118 | 119 | else: 120 | print 'warning: no arguments after "#omp"' -------------------------------------------------------------------------------- /lib/parseprint.py: -------------------------------------------------------------------------------- 1 | from ast import * 2 | 3 | def dump(node, annotate_fields=True, include_attributes=False, indent=' '): 4 | """ 5 | Return a formatted dump of the tree in *node*. This is mainly useful for 6 | debugging purposes. The returned string will show the names and the values 7 | for fields. This makes the code impossible to evaluate, so if evaluation is 8 | wanted *annotate_fields* must be set to False. Attributes such as line 9 | numbers and column offsets are not dumped by default. If this is wanted, 10 | *include_attributes* can be set to True. 11 | """ 12 | def _format(node, level=0): 13 | if isinstance(node, AST): 14 | fields = [(a, _format(b, level)) for a, b in iter_fields(node)] 15 | if include_attributes and node._attributes: 16 | fields.extend([(a, _format(getattr(node, a), level)) 17 | for a in node._attributes]) 18 | return ''.join([ 19 | node.__class__.__name__, 20 | '(', 21 | ', '.join(('%s=%s' % field for field in fields) 22 | if annotate_fields else 23 | (b for a, b in fields)), 24 | ')']) 25 | elif isinstance(node, list): 26 | lines = ['['] 27 | lines.extend((indent * (level + 2) + _format(x, level + 2) + ',' 28 | for x in node)) 29 | if len(lines) > 1: 30 | lines.append(indent * (level + 1) + ']') 31 | else: 32 | lines[-1] += ']' 33 | return '\n'.join(lines) 34 | return repr(node) 35 | 36 | if not isinstance(node, AST): 37 | raise TypeError('expected AST, got %r' % node.__class__.__name__) 38 | return _format(node) 39 | 40 | def parseprint(code, filename="", mode="exec", **kwargs): 41 | """Parse some code from a string and pretty-print it.""" 42 | node = parse(code, mode=mode) # An ode to the code 43 | print dump(node, **kwargs) 44 | 45 | -------------------------------------------------------------------------------- /lib/parseprint.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuohan123/openmp-for-python/22959cd07d1fd8c45799b7a71ae5495453708134/lib/parseprint.pyc -------------------------------------------------------------------------------- /lib/transform: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhuohan123/openmp-for-python/22959cd07d1fd8c45799b7a71ae5495453708134/lib/transform -------------------------------------------------------------------------------- /lib/tree.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import parseprint 3 | import sys 4 | 5 | if len(sys.argv)!=2: 6 | raise ValueError('File name needed!') 7 | 8 | file = open(sys.argv[1]) 9 | original_code=file.read() 10 | 11 | class variable_checker(ast.NodeVisitor): 12 | def visit(self, node): 13 | ast.NodeVisitor.visit(self,node) 14 | 15 | def visit_FunctionDef(self, node): 16 | print 'function begin', node.name, node.lineno 17 | self.generic_visit(node) 18 | 19 | lastBody = node.body[-1] 20 | while 'body' in lastBody._attributes: 21 | lastBody = lastBody.body[-1] 22 | lastLine = lastBody.lineno 23 | print 'function end', node.name, lastLine+1 24 | 25 | def visit_arguments(self, node): 26 | self.generic_visit(node) 27 | if node.vararg!=None: 28 | print 'vararg',node.vararg 29 | if node.kwarg != None: 30 | print 'kwarg', node.kwarg 31 | 32 | def visit_Global(self, node): 33 | for name in node.names: 34 | print 'global',name 35 | self.generic_visit(node) 36 | 37 | def visit_Name(self, node): 38 | var_state = 'unknown' 39 | if isinstance(node.ctx, ast.Store): 40 | var_state = 'store' 41 | elif isinstance(node.ctx, ast.Load): 42 | var_state = 'load' 43 | elif isinstance(node.ctx, ast.Del): 44 | var_state = 'del' 45 | elif isinstance(node.ctx, ast.Param): 46 | var_state = 'param' 47 | if var_state == 'param': 48 | print 'arg',node.id 49 | else: 50 | print 'variable',node.lineno, node.col_offset, node.id, var_state 51 | self.generic_visit(node) 52 | 53 | def visit_comprehension(self, node): 54 | print 'comprehension start' 55 | self.generic_visit(node) 56 | print 'comprehension end' 57 | 58 | def visit_Lambda(self, node): 59 | print 'lambda start' 60 | self.generic_visit(node) 61 | print 'lambda end' 62 | 63 | def visit_For(self, node): 64 | print 'for begin', node.lineno 65 | self.generic_visit(node) 66 | lastBody = node.body[-1] 67 | while 'body' in lastBody._attributes: 68 | lastBody = lastBody.body[-1] 69 | lastLine = lastBody.lineno 70 | print 'for end', lastLine+1 71 | 72 | tree = ast.parse(original_code) 73 | 74 | variable_checker().visit(tree) 75 | 76 | #parseprint.parseprint(tree, include_attributes=True) 77 | -------------------------------------------------------------------------------- /omp.py: -------------------------------------------------------------------------------- 1 | import threading 2 | 3 | with open('cpu_count.txt', 'r') as _f: 4 | _cpu_count = int(_f.read()) 5 | 6 | def cpu_count(): 7 | return _cpu_count 8 | 9 | _num_threads = 1 10 | 11 | 12 | def get_thread_num(): 13 | thread_name = threading.current_thread().getName() 14 | if thread_name == 'MainThread': 15 | return 0 16 | else: 17 | return int(threading.current_thread().getName()) 18 | 19 | 20 | def get_num_threads(): 21 | global _num_threads 22 | return _num_threads 23 | 24 | 25 | def set_num_threads(n): 26 | global _num_threads 27 | _num_threads = n 28 | 29 | 30 | # helper functions for for statement 31 | def prange(start, stop=None, step=1): 32 | if stop is None: 33 | stop = start 34 | start = 0 35 | id = get_thread_num() 36 | nthrds = get_num_threads() 37 | if step == 0: 38 | raise ValueError('step can not be zero.') 39 | return xrange(start + id * step, stop, nthrds * step) 40 | 41 | 42 | def plist(iter_list): 43 | l = list(iter_list) 44 | id = get_thread_num() 45 | nthrds = get_num_threads() 46 | return l[id::nthrds] 47 | 48 | # lock functions 49 | def init_lock(): 50 | return threading.Lock() 51 | 52 | 53 | def set_lock(lock): 54 | lock.acquire() 55 | 56 | 57 | def unset_lock(lock): 58 | lock.release() 59 | 60 | 61 | # helper functions for critical statement 62 | _internal_locks = [] 63 | 64 | 65 | def set_num_of_internal_locks(num): 66 | global _internal_locks 67 | _internal_locks = [init_lock() for _ in xrange(num)] 68 | 69 | 70 | def set_internal_lock(lock_id): 71 | global _internal_locks 72 | set_lock(_internal_locks[lock_id]) 73 | 74 | 75 | def unset_internal_lock(lock_id): 76 | global _internal_locks 77 | unset_lock(_internal_locks[lock_id]) 78 | 79 | 80 | # helper functions for reduction statement 81 | def reduction_init(op): 82 | if op == '+': 83 | return 0 84 | elif op == '-': 85 | return 0 86 | elif op == '*': 87 | return 1 88 | elif op == 'max': 89 | return float('-inf') 90 | elif op == 'min': 91 | return float('inf') 92 | elif op == '&': 93 | return -1 94 | elif op == '|': 95 | return 0 96 | elif op == '^': 97 | return 0 98 | elif op == 'and': 99 | return True 100 | elif op == 'or': 101 | return False 102 | 103 | 104 | def reduction(op, tot, num): 105 | if op == '+': 106 | return tot + num 107 | elif op == '-': 108 | return tot - num 109 | elif op == '*': 110 | return tot * num 111 | elif op == 'max': 112 | return max(tot, num) 113 | elif op == 'min': 114 | return min(tot, num) 115 | elif op == '&': 116 | return tot & num 117 | elif op == '|': 118 | return tot | num 119 | elif op == '^': 120 | return tot ^ num 121 | elif op == 'and': 122 | return tot and num 123 | elif op == 'or': 124 | return tot or num 125 | 126 | 127 | # helper functions for barrier statement 128 | class _Barrier: 129 | def __init__(self, n): 130 | self.n = n 131 | self.count = 0 132 | self.mutex = threading.Semaphore(1) 133 | self.barrier = threading.Semaphore(0) 134 | self.barrier2 = threading.Semaphore(1) 135 | 136 | def wait(self): 137 | self.mutex.acquire() 138 | self.count += 1 139 | if self.count == self.n: 140 | self.barrier2.acquire() 141 | self.barrier.release() 142 | self.mutex.release() 143 | 144 | self.barrier.acquire() 145 | self.barrier.release() 146 | 147 | self.mutex.acquire() 148 | self.count -= 1 149 | if self.count == 0: 150 | self.barrier.acquire() 151 | self.barrier2.release() 152 | self.mutex.release() 153 | 154 | self.barrier2.acquire() 155 | self.barrier2.release() 156 | 157 | 158 | class _Do_Nothing_Barrier: 159 | def wait(self): 160 | pass 161 | 162 | 163 | _barrier = _Do_Nothing_Barrier() 164 | 165 | 166 | def barrier(): 167 | global _barrier 168 | _barrier.wait() 169 | 170 | 171 | # helper functions for dynamic for 172 | _iter_item = None 173 | _loop_lock = init_lock() 174 | def drange(start, stop=None, step=1): 175 | global _iter_item, _loop_lock 176 | if stop is None: 177 | stop = start 178 | start = 0 179 | if step == 0: 180 | raise ValueError('step can not be zero.') 181 | set_lock(_loop_lock) 182 | if _iter_item is None: 183 | _iter_item = start 184 | unset_lock(_loop_lock) 185 | while _iter_item < stop: 186 | set_lock(_loop_lock) 187 | tmp_iter_item = _iter_item 188 | _iter_item += step 189 | unset_lock(_loop_lock) 190 | if tmp_iter_item >= stop: 191 | break 192 | yield tmp_iter_item 193 | barrier() 194 | set_lock(_loop_lock) 195 | if _iter_item is not None: 196 | _iter_item = None 197 | unset_lock(_loop_lock) 198 | 199 | 200 | def dlist(iter_list): 201 | global _iter_item, _loop_lock 202 | l = list(iter_list) 203 | m = len(iter_list) 204 | set_lock(_loop_lock) 205 | if _iter_item is None: 206 | _iter_item = 0 207 | unset_lock(_loop_lock) 208 | while _iter_item < m: 209 | set_lock(_loop_lock) 210 | tmp_iter_item = _iter_item 211 | _iter_item += 1 212 | unset_lock(_loop_lock) 213 | if tmp_iter_item >= stop: 214 | break 215 | yield l[tmp_iter_item] 216 | barrier() 217 | set_lock(_loop_lock) 218 | if _iter_item is not None: 219 | _iter_item = None 220 | unset_lock(_loop_lock) 221 | 222 | 223 | # helper functions for parallel statement 224 | def parallel_run(func, nthrds=None): 225 | global _barrier 226 | tmp_num_threads = get_num_threads() 227 | if nthrds is None or nthrds == 0: 228 | nthrds = cpu_count() 229 | set_num_threads(nthrds) 230 | _barrier = _Barrier(nthrds) 231 | threads = [] 232 | for i in xrange(get_num_threads()): 233 | threads.append(threading.Thread(target=func, name=str(i))) 234 | for t in threads: 235 | t.start() 236 | for t in threads: 237 | t.join() 238 | set_num_threads(tmp_num_threads) 239 | _barrier = _Do_Nothing_Barrier() 240 | 241 | -------------------------------------------------------------------------------- /other-examples-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(3) 3 | import time 4 | import random 5 | def hello_world(): 6 | _dict1={} 7 | print "i write dazuoye!!!" 8 | print omp.get_thread_num(), '/', omp.get_num_threads() 9 | _dict1['a'] = 2017 10 | # omp parallel num_threads(4) 11 | def _block0(): 12 | print "i love bianyishixi!\n", 13 | print '%d / %d\n' % (omp.get_thread_num(), omp.get_num_threads()), 14 | print "a = " + str(_dict1['a']) +'\n', 15 | # omp parallel end 16 | omp.parallel_run(_block0,4) 17 | hello_world() 18 | num_step = 100000 19 | step = 1.0 / num_step 20 | def calc_pi(): 21 | _dict2={} 22 | _dict2['ans'] = [0] * 8 23 | # omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 24 | def _block1(): 25 | tid = omp.get_thread_num() 26 | nthrds = omp.get_num_threads() 27 | print "working: " + str(tid) + ' / ' + str(nthrds) + '\n', 28 | tmp_ans = 0 29 | for i in xrange(tid, num_step, nthrds): 30 | x = (i + 0.5) * step 31 | tmp_ans += 4.0 / (1.0 + x * x) 32 | _dict2['ans'][tid] = tmp_ans 33 | # omp parallel end 34 | omp.parallel_run(_block1,8) 35 | print sum(_dict2['ans']) * step 36 | calc_pi() 37 | def calc_pi_for(): 38 | _dict3={} 39 | _dict3['ans'] = 0 40 | _dict3['lock'] = omp.init_lock() 41 | # omp parallel num_threads(8) private(i,x) 42 | def _block2(): 43 | # omp for reduction(+:ans) 44 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+') 45 | for i in omp.prange(num_step): 46 | x = (i + 0.5) * step 47 | OMP_REDUCTION_VAR_0_0 += 4.0 / (1.0 + x * x) 48 | omp.set_internal_lock(0) 49 | _dict3['ans'] = omp.reduction('+',_dict3['ans'],OMP_REDUCTION_VAR_0_0) 50 | omp.unset_internal_lock(0) 51 | omp.barrier() 52 | # omp parallel end 53 | omp.parallel_run(_block2,8) 54 | print _dict3['ans'] * step 55 | calc_pi_for() 56 | def calc_pi_critical(): 57 | _dict4={} 58 | _dict4['ans'] = 0 59 | _dict4['lock'] = omp.init_lock() 60 | # omp parallel num_threads(8) private(i,x) 61 | def _block3(): 62 | # omp for 63 | for i in omp.prange(num_step): 64 | x = (i + 0.5) * step 65 | # omp critical 66 | omp.set_internal_lock(2) 67 | _dict4['ans'] += 4.0 / (1.0 + x * x) 68 | omp.unset_internal_lock(2) 69 | omp.barrier() 70 | # omp critical end 71 | # omp parallel end 72 | omp.parallel_run(_block3,8) 73 | print _dict4['ans'] * step 74 | calc_pi_critical() 75 | def sections_test(): 76 | _dict5={} 77 | # omp parallel num_threads(2) 78 | def _block4(): 79 | # omp sections 80 | for OMP_SECTIONS_ID in omp.prange(2): 81 | # omp section 82 | if OMP_SECTIONS_ID == 0: 83 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 84 | # omp section end 85 | # omp section 86 | if OMP_SECTIONS_ID == 1: 87 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 88 | # omp section end 89 | # omp sections end 90 | omp.barrier() 91 | # omp parallel end 92 | omp.parallel_run(_block4,2) 93 | sections_test() 94 | def barrier_test(): 95 | _dict6={} 96 | # omp parallel 97 | def _block5(): 98 | print str(omp.get_thread_num()) + ' a\n', 99 | time.sleep(random.randrange(3)) 100 | # omp barrier 101 | omp.barrier() 102 | print str(omp.get_thread_num()) + ' b\n', 103 | time.sleep(random.randrange(3)) 104 | # omp barrier 105 | omp.barrier() 106 | print str(omp.get_thread_num()) + ' c\n', 107 | # omp parallel end 108 | omp.parallel_run(_block5,0) 109 | barrier_test() 110 | pass 111 | 112 | -------------------------------------------------------------------------------- /other-examples.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | 6 | def hello_world(): 7 | print "i write dazuoye!!!" 8 | print omp.get_thread_num(), '/', omp.get_num_threads() 9 | a = 2017 10 | # omp parallel num_threads(4) 11 | print "i love bianyishixi!\n", 12 | print '%d / %d\n' % (omp.get_thread_num(), omp.get_num_threads()), 13 | print "a = " + str(a) +'\n', 14 | # omp parallel end 15 | 16 | 17 | hello_world() 18 | 19 | num_step = 100000 20 | step = 1.0 / num_step 21 | 22 | def calc_pi(): 23 | ans = [0] * 8 24 | # omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 25 | tid = omp.get_thread_num() 26 | nthrds = omp.get_num_threads() 27 | print "working: " + str(tid) + ' / ' + str(nthrds) + '\n', 28 | tmp_ans = 0 29 | for i in xrange(tid, num_step, nthrds): 30 | x = (i + 0.5) * step 31 | tmp_ans += 4.0 / (1.0 + x * x) 32 | ans[tid] = tmp_ans 33 | # omp parallel end 34 | print sum(ans) * step 35 | 36 | 37 | calc_pi() 38 | 39 | 40 | def calc_pi_for(): 41 | ans = 0 42 | lock = omp.init_lock() 43 | # omp parallel num_threads(8) private(i,x) 44 | # omp for reduction(+:ans) 45 | for i in xrange(num_step): 46 | x = (i + 0.5) * step 47 | ans += 4.0 / (1.0 + x * x) 48 | # omp parallel end 49 | print ans * step 50 | 51 | 52 | calc_pi_for() 53 | 54 | 55 | def calc_pi_critical(): 56 | ans = 0 57 | lock = omp.init_lock() 58 | # omp parallel num_threads(8) private(i,x) 59 | # omp for 60 | for i in xrange(num_step): 61 | x = (i + 0.5) * step 62 | # omp critical 63 | ans += 4.0 / (1.0 + x * x) 64 | # omp critical end 65 | # omp parallel end 66 | print ans * step 67 | 68 | 69 | calc_pi_critical() 70 | 71 | 72 | def sections_test(): 73 | # omp parallel num_threads(2) 74 | # omp sections 75 | # omp section 76 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 77 | # omp section end 78 | # omp section 79 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 80 | # omp section end 81 | # omp sections end 82 | # omp parallel end 83 | 84 | 85 | sections_test() 86 | 87 | 88 | def barrier_test(): 89 | # omp parallel 90 | print str(omp.get_thread_num()) + ' a\n', 91 | time.sleep(random.randrange(3)) 92 | # omp barrier 93 | print str(omp.get_thread_num()) + ' b\n', 94 | time.sleep(random.randrange(3)) 95 | # omp barrier 96 | print str(omp.get_thread_num()) + ' c\n', 97 | # omp parallel end 98 | 99 | barrier_test() -------------------------------------------------------------------------------- /pyomp: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python2.7 ./lib/cpu_count.py > cpu_count.txt 4 | python2.7 ./lib/delete_empty_lines.py $1 > ./tmp/no_blank_line.py 5 | python2.7 ./lib/parse_omp.py ./tmp/no_blank_line.py > ./tmp/log_omp.txt 6 | python2.7 ./lib/tree.py ./tmp/no_blank_line.py > ./tmp/log_code.txt 7 | ./lib/transform ./tmp/no_blank_line.py ./tmp/log_code.txt ./tmp/log_omp.txt $2 2>/tmp/null 8 | -------------------------------------------------------------------------------- /std_test/MatrixMultiple-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | import random 4 | def genMatrix(n): 5 | _dict1={} 6 | _dict1['n']=n 7 | return [random.randrange(_dict1['n']) for i in range(_dict1['n']*_dict1['n'])] 8 | def matrixMul(N, a, b): 9 | _dict2={} 10 | _dict2['N']=N 11 | _dict2['a']=a 12 | _dict2['b']=b 13 | _dict2['res'] = [0 for i in range(_dict2['N']*_dict2['N'])] 14 | # omp parallel num_threads(2) private(n,i,j,tmp,k) 15 | def _block0(): 16 | # omp for 17 | for n in omp.prange(_dict2['N']*_dict2['N']): 18 | i = n / _dict2['N'] 19 | j = n % _dict2['N'] 20 | tmp = 0 21 | for k in range(_dict2['N']): 22 | tmp = tmp + _dict2['a'][i*_dict2['N']+k] * _dict2['b'][k*_dict2['N']+j] 23 | _dict2['res'][n] = tmp 24 | omp.barrier() 25 | # omp parallel end 26 | omp.parallel_run(_block0,2) 27 | return _dict2['res'] 28 | n = 3 29 | a = genMatrix(n) 30 | b = genMatrix(n) 31 | print a 32 | print b 33 | print matrixMul(n, a, b) 34 | pass 35 | 36 | -------------------------------------------------------------------------------- /std_test/MatrixMultiple.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import random 3 | 4 | def genMatrix(n): 5 | return [random.randrange(n) for i in range(n*n)] 6 | 7 | def matrixMul(N, a, b): 8 | res = [0 for i in range(N*N)] 9 | # omp parallel num_threads(2) private(n,i,j,tmp,k) 10 | # omp for 11 | for n in range(N*N): 12 | i = n / N 13 | j = n % N 14 | tmp = 0 15 | for k in range(N): 16 | tmp = tmp + a[i*N+k] * b[k*N+j] 17 | res[n] = tmp 18 | # omp parallel end 19 | return res 20 | 21 | n = 3 22 | a = genMatrix(n) 23 | b = genMatrix(n) 24 | print a 25 | print b 26 | print matrixMul(n, a, b) 27 | -------------------------------------------------------------------------------- /std_test/adds-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | def count(n): 4 | _dict1={} 5 | _dict1['n']=n 6 | _dict1['s'] = 0 7 | #omp parallel private(i) num_threads(2) 8 | def _block0(): 9 | if omp.get_thread_num()==0: 10 | print 'num_threads =', omp.get_num_threads() 11 | #omp for reduction(+:s) 12 | OMP_REDUCTION_VAR_0 = omp.reduction_init('+') 13 | for i in omp.prange(_dict1['n']): 14 | OMP_REDUCTION_VAR_0 += i 15 | omp.set_internal_lock(0) 16 | _dict1['s'] = omp.reduction('+',_dict1['s'],OMP_REDUCTION_VAR_0) 17 | omp.unset_internal_lock(0) 18 | omp.barrier() 19 | #omp parallel end 20 | omp.parallel_run(_block0,2) 21 | return _dict1['s'] 22 | print count(500000000) 23 | pass 24 | 25 | -------------------------------------------------------------------------------- /std_test/adds.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def count(n): 4 | s = 0 5 | #omp parallel private(i) num_threads(2) 6 | if omp.get_thread_num()==0: 7 | print 'num_threads =', omp.get_num_threads() 8 | #omp for reduction(+:s) 9 | for i in xrange(n): 10 | s += i 11 | #omp parallel end 12 | return s 13 | print count(500000000) 14 | -------------------------------------------------------------------------------- /std_test/pi-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | import time 4 | import random 5 | num_step = 300000000 6 | step = 1.0 / num_step 7 | def calc_pi_for(): 8 | _dict1={} 9 | _dict1['ans'] = 0 10 | # omp parallel num_threads(2) private(i,x) 11 | def _block0(): 12 | # omp for reduction(+:ans) 13 | OMP_REDUCTION_VAR_0 = omp.reduction_init('+') 14 | for i in omp.prange(num_step): 15 | x = (i + 0.5) * step 16 | OMP_REDUCTION_VAR_0 += 4.0 / (1.0 + x * x) 17 | omp.set_internal_lock(0) 18 | _dict1['ans'] = omp.reduction('+',_dict1['ans'],OMP_REDUCTION_VAR_0) 19 | omp.unset_internal_lock(0) 20 | omp.barrier() 21 | # omp parallel end 22 | omp.parallel_run(_block0,2) 23 | print "%.9f\n" % (_dict1['ans'] * step), 24 | calc_pi_for() 25 | pass 26 | 27 | -------------------------------------------------------------------------------- /std_test/pi.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | num_step = 300000000 4 | step = 1.0 / num_step 5 | 6 | def calc_pi_for(): 7 | ans = 0 8 | # omp parallel num_threads(2) private(i,x) 9 | # omp for reduction(+:ans) 10 | for i in xrange(num_step): 11 | x = (i + 0.5) * step 12 | ans += 4.0 / (1.0 + x * x) 13 | # omp parallel end 14 | print "%.10f\n" % (ans * step), 15 | 16 | calc_pi_for() 17 | 18 | 19 | -------------------------------------------------------------------------------- /std_test/test_barrier-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(0) 3 | import time 4 | import random 5 | def barrier_test(): 6 | _dict1={} 7 | # omp parallel 8 | def _block0(): 9 | print str(omp.get_thread_num()) + ' a\n', 10 | time.sleep(random.randrange(3)) 11 | print str(omp.get_thread_num()) + ' b\n', 12 | time.sleep(random.randrange(3)) 13 | # omp barrier 14 | omp.barrier() 15 | print str(omp.get_thread_num()) + ' c\n', 16 | # omp parallel end 17 | omp.parallel_run(_block0,0) 18 | barrier_test() 19 | pass 20 | 21 | -------------------------------------------------------------------------------- /std_test/test_barrier.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | def barrier_test(): 6 | # omp parallel 7 | print str(omp.get_thread_num()) + ' a\n', 8 | time.sleep(random.randrange(3)) 9 | print str(omp.get_thread_num()) + ' b\n', 10 | time.sleep(random.randrange(3)) 11 | # omp barrier 12 | print str(omp.get_thread_num()) + ' c\n', 13 | # omp parallel end 14 | 15 | barrier_test() -------------------------------------------------------------------------------- /std_test/test_for_critical_nowait-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(2) 3 | num_step = 1000000 4 | step = 1.0 / num_step 5 | def calc_pi_critical(): 6 | _dict1={} 7 | _dict1['ans'] = 0 8 | # omp parallel num_threads(8) private(i,x) 9 | def _block0(): 10 | # omp for nowait 11 | for i in omp.prange(num_step): 12 | x = (i + 0.5) * step 13 | # omp critical 14 | omp.set_internal_lock(1) 15 | _dict1['ans'] += 4.0 / (1.0 + x * x) 16 | omp.unset_internal_lock(1) 17 | # omp critical end 18 | # omp parallel end 19 | omp.parallel_run(_block0,8) 20 | print _dict1['ans'] * step 21 | calc_pi_critical() 22 | pass 23 | 24 | -------------------------------------------------------------------------------- /std_test/test_for_critical_nowait.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | num_step = 1000000 4 | step = 1.0 / num_step 5 | 6 | def calc_pi_critical(): 7 | ans = 0 8 | # omp parallel num_threads(8) private(i,x) 9 | # omp for nowait 10 | for i in range(num_step): 11 | x = (i + 0.5) * step 12 | # omp critical 13 | ans += 4.0 / (1.0 + x * x) 14 | # omp critical end 15 | # omp parallel end 16 | print ans * step 17 | 18 | calc_pi_critical() -------------------------------------------------------------------------------- /std_test/test_for_reduce_dynamic-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | num_step = 1000000 4 | step = 1.0 / num_step 5 | def calc_pi_for(): 6 | _dict1={} 7 | _dict1['ans'] = 0 8 | # omp parallel num_threads(8) private(i,x) 9 | def _block0(): 10 | # omp for reduction(+:ans) schedule(dynamic) 11 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+') 12 | for i in omp.drange(num_step): 13 | x = (i + 0.5) * step 14 | OMP_REDUCTION_VAR_0_0 += 4.0 / (1.0 + x * x) 15 | omp.set_internal_lock(0) 16 | _dict1['ans'] = omp.reduction('+',_dict1['ans'],OMP_REDUCTION_VAR_0_0) 17 | omp.unset_internal_lock(0) 18 | omp.barrier() 19 | # omp parallel end 20 | omp.parallel_run(_block0,8) 21 | print _dict1['ans'] * step 22 | calc_pi_for() 23 | pass 24 | 25 | -------------------------------------------------------------------------------- /std_test/test_for_reduce_dynamic.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | num_step = 1000000 4 | step = 1.0 / num_step 5 | 6 | def calc_pi_for(): 7 | ans = 0 8 | # omp parallel num_threads(8) private(i,x) 9 | # omp for reduction(+:ans) schedule(dynamic) 10 | for i in range(num_step): 11 | x = (i + 0.5) * step 12 | ans += 4.0 / (1.0 + x * x) 13 | # omp parallel end 14 | print ans * step 15 | 16 | calc_pi_for() -------------------------------------------------------------------------------- /std_test/test_haswait-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | import time 4 | import random 5 | def haswait_test(): 6 | _dict1={} 7 | # omp parallel num_threads(8) private(i) 8 | def _block0(): 9 | # omp for 10 | for i in omp.prange(8): 11 | time.sleep(random.randrange(3)) 12 | print str(omp.get_thread_num()) + ' thread\n', 13 | omp.barrier() 14 | print "done" 15 | # omp parallel end 16 | omp.parallel_run(_block0,8) 17 | haswait_test() 18 | pass 19 | 20 | -------------------------------------------------------------------------------- /std_test/test_haswait.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | def haswait_test(): 6 | # omp parallel num_threads(8) private(i) 7 | # omp for 8 | for i in range(8): 9 | time.sleep(random.randrange(3)) 10 | print str(omp.get_thread_num()) + ' thread\n', 11 | print "done" 12 | # omp parallel end 13 | 14 | haswait_test() -------------------------------------------------------------------------------- /std_test/test_nowait-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(1) 3 | import time 4 | import random 5 | def nowait_test(): 6 | _dict1={} 7 | # omp parallel num_threads(8) private(i) 8 | def _block0(): 9 | # omp for nowait 10 | for i in omp.prange(8): 11 | time.sleep(random.randrange(3)) 12 | print str(omp.get_thread_num()) + ' thread\n', 13 | print "done" 14 | # omp parallel end 15 | omp.parallel_run(_block0,8) 16 | nowait_test() 17 | pass 18 | 19 | -------------------------------------------------------------------------------- /std_test/test_nowait.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | def nowait_test(): 6 | # omp parallel num_threads(8) private(i) 7 | # omp for nowait 8 | for i in range(8): 9 | time.sleep(random.randrange(3)) 10 | print str(omp.get_thread_num()) + ' thread\n', 11 | print "done" 12 | # omp parallel end 13 | 14 | nowait_test() -------------------------------------------------------------------------------- /std_test/test_parallel-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(0) 3 | def hello_world(): 4 | _dict1={} 5 | print "i write dazuoye!!!" 6 | print omp.get_thread_num(),'/',omp.get_num_threads() 7 | _dict1['a'] = 2017 8 | #omp parallel num_threads(4) 9 | def _block0(): 10 | print "i love bianyishixi!" 11 | print omp.get_thread_num(),'/',omp.get_num_threads() 12 | print "a =", _dict1['a'] 13 | #omp parallel end 14 | omp.parallel_run(_block0,4) 15 | hello_world() 16 | pass 17 | 18 | -------------------------------------------------------------------------------- /std_test/test_parallel.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def hello_world(): 4 | print "i write dazuoye!!!" 5 | print omp.get_thread_num(),'/',omp.get_num_threads() 6 | a = 2017 7 | #omp parallel num_threads(4) 8 | print "i love bianyishixi!" 9 | print omp.get_thread_num(),'/',omp.get_num_threads() 10 | print "a =", a 11 | #omp parallel end 12 | 13 | hello_world() -------------------------------------------------------------------------------- /std_test/test_sections-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(0) 3 | def sections_test(): 4 | _dict1={} 5 | # omp parallel num_threads(2) 6 | def _block0(): 7 | # omp sections 8 | for OMP_SECTIONS_ID in omp.prange(2): 9 | # omp section 10 | if OMP_SECTIONS_ID == 0: 11 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 12 | # omp section end 13 | # omp section 14 | if OMP_SECTIONS_ID == 1: 15 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 16 | # omp section end 17 | # omp sections end 18 | omp.barrier() 19 | # omp parallel end 20 | omp.parallel_run(_block0,2) 21 | sections_test() 22 | pass 23 | 24 | -------------------------------------------------------------------------------- /std_test/test_sections.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def sections_test(): 4 | # omp parallel num_threads(2) 5 | # omp sections 6 | # omp section 7 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 8 | # omp section end 9 | # omp section 10 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 11 | # omp section end 12 | # omp sections end 13 | # omp parallel end 14 | 15 | sections_test() -------------------------------------------------------------------------------- /std_test/test_variable-out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(0) 3 | class MyClass: 4 | i = 12345 5 | def f(self): 6 | _dict1={} 7 | _dict1['self']=self 8 | return 'hello world' 9 | tmp = MyClass() 10 | print tmp.f() 11 | c = 1 12 | f = 2 13 | def func(a,b,*d,**e): 14 | _dict2={} 15 | _dict2['a']=a 16 | _dict2['b']=b 17 | _dict2['d']=d 18 | _dict2['e']=e 19 | global c,f 20 | return _dict2['a']+_dict2['b']+c+f 21 | print func(3,4,None,None) 22 | add2 = lambda x,y:x+y 23 | print add2(1,2) 24 | l = [2*i for i in range(10) if i>0] 25 | print l 26 | a = 4 27 | def f(): 28 | _dict3={} 29 | _dict3['a'] = 2 30 | def g(): 31 | _dict4={} 32 | _dict4['b'] = _dict3['a'] 33 | return _dict4['b'] 34 | return g() 35 | print f() 36 | pass 37 | 38 | -------------------------------------------------------------------------------- /std_test/test_variable.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | class MyClass: 4 | i = 12345 5 | def f(self): 6 | return 'hello world' 7 | tmp = MyClass() 8 | print tmp.f() 9 | 10 | c = 1 11 | f = 2 12 | def func(a,b,*d,**e): 13 | global c,f 14 | return a+b+c+f 15 | print func(3,4,None,None) 16 | 17 | add2 = lambda x,y:x+y 18 | print add2(1,2) 19 | 20 | l = [2*i for i in range(10) if i>0] 21 | print l 22 | 23 | a = 4 24 | def f(): 25 | a = 2 26 | def g(): 27 | b = a 28 | return b 29 | return g() 30 | print f() -------------------------------------------------------------------------------- /tests/MatrixMultiple.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import random 3 | 4 | def genMatrix(n): 5 | return [random.randrange(n) for i in range(n*n)] 6 | 7 | def matrixMul(N, a, b): 8 | res = [0 for i in range(N*N)] 9 | # omp parallel num_threads(2) private(n,i,j,tmp,k) 10 | # omp for 11 | for n in range(N*N): 12 | i = n / N 13 | j = n % N 14 | tmp = 0 15 | for k in range(N): 16 | tmp = tmp + a[i*N+k] * b[k*N+j] 17 | res[n] = tmp 18 | # omp parallel end 19 | return res 20 | 21 | n = 3 22 | a = genMatrix(n) 23 | b = genMatrix(n) 24 | print a 25 | print b 26 | print matrixMul(n, a, b) 27 | -------------------------------------------------------------------------------- /tests/adds.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def count(n): 4 | s = 0 5 | #omp parallel private(i) num_threads(4) 6 | if omp.get_thread_num()==0: 7 | print 'num_threads =', omp.get_num_threads() 8 | #omp for reduction(+:s) 9 | for i in xrange(n): 10 | s += i 11 | #omp parallel end 12 | return s 13 | print count(500000000) 14 | -------------------------------------------------------------------------------- /tests/list_test.py: -------------------------------------------------------------------------------- 1 | def prange(start, stop=None, step=1, id=0, nthrds=1): 2 | if stop is None: 3 | stop = start 4 | start = 0 5 | if step == 0: 6 | raise ValueError('step can not be zero.'); 7 | return xrange(start+id*step, stop, nthrds*step) 8 | 9 | def plist(iter_list, id=0, nthrds=1): 10 | l = list(iter_list) 11 | return l[id::nthrds] 12 | 13 | nthrds = 5 14 | 15 | 16 | for i in range(nthrds): 17 | print list(plist(range(20),id=i, nthrds=nthrds)) 18 | 19 | 20 | -------------------------------------------------------------------------------- /tests/omp_lib_test.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | def f(): 6 | for i in omp.drange(10): 7 | print str(i)+' '+str(omp.get_thread_num())+'\n', 8 | time.sleep(omp.get_thread_num()) 9 | 10 | omp.parallel_run(f,4) 11 | 12 | -------------------------------------------------------------------------------- /tests/other-examples.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | 6 | def hello_world(): 7 | print "i write dazuoye!!!" 8 | print omp.get_thread_num(), '/', omp.get_num_threads() 9 | a = 2017 10 | # omp parallel num_threads(4) 11 | print "i love bianyishixi!\n", 12 | print '%d / %d\n' % (omp.get_thread_num(), omp.get_num_threads()), 13 | print "a = " + str(a) +'\n', 14 | # omp parallel end 15 | 16 | 17 | hello_world() 18 | 19 | num_step = 100000 20 | step = 1.0 / num_step 21 | 22 | def calc_pi(): 23 | ans = [0] * 8 24 | # omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 25 | tid = omp.get_thread_num() 26 | nthrds = omp.get_num_threads() 27 | print "working: " + str(tid) + ' / ' + str(nthrds) + '\n', 28 | tmp_ans = 0 29 | for i in xrange(tid, num_step, nthrds): 30 | x = (i + 0.5) * step 31 | tmp_ans += 4.0 / (1.0 + x * x) 32 | ans[tid] = tmp_ans 33 | # omp parallel end 34 | print sum(ans) * step 35 | 36 | 37 | calc_pi() 38 | 39 | 40 | def calc_pi_for(): 41 | ans = 0 42 | lock = omp.init_lock() 43 | # omp parallel num_threads(8) private(i,x) 44 | # omp for reduction(+:ans) 45 | for i in range(num_step): 46 | x = (i + 0.5) * step 47 | ans += 4.0 / (1.0 + x * x) 48 | # omp parallel end 49 | print ans * step 50 | 51 | 52 | calc_pi_for() 53 | 54 | 55 | def calc_pi_critical(): 56 | ans = 0 57 | lock = omp.init_lock() 58 | # omp parallel num_threads(8) private(i,x) 59 | # omp for 60 | for i in range(num_step): 61 | x = (i + 0.5) * step 62 | # omp critical 63 | ans += 4.0 / (1.0 + x * x) 64 | # omp critical end 65 | # omp parallel end 66 | print ans * step 67 | 68 | 69 | calc_pi_critical() 70 | 71 | 72 | def sections_test(): 73 | # omp parallel num_threads(2) 74 | # omp sections 75 | # omp section 76 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 77 | # omp section end 78 | # omp section 79 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 80 | # omp section end 81 | # omp sections end 82 | # omp parallel end 83 | 84 | 85 | sections_test() 86 | 87 | 88 | def barrier_test(): 89 | # omp parallel 90 | print str(omp.get_thread_num()) + ' a\n', 91 | time.sleep(random.randrange(3)) 92 | # omp barrier 93 | print str(omp.get_thread_num()) + ' b\n', 94 | time.sleep(random.randrange(3)) 95 | # omp barrier 96 | print str(omp.get_thread_num()) + ' c\n', 97 | # omp parallel end 98 | -------------------------------------------------------------------------------- /tests/pi.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | num_step = 300000000 4 | step = 1.0 / num_step 5 | 6 | def calc_pi_for(): 7 | ans = 0 8 | # omp parallel num_threads(4) private(i,x) 9 | # omp for reduction(+:ans) 10 | for i in xrange(num_step): 11 | x = (i + 0.5) * step 12 | ans += 4.0 / (1.0 + x * x) 13 | # omp parallel end 14 | print "%.10f\n" % (ans * step), 15 | 16 | calc_pi_for() 17 | 18 | 19 | -------------------------------------------------------------------------------- /tests/test_program.py: -------------------------------------------------------------------------------- 1 | import omp 2 | 3 | def count(n): 4 | s = 0 5 | #omp parallel private(i) 6 | #omp for reduction(+:s) 7 | for i in xrange(n): 8 | s += i 9 | #omp parallel end 10 | return s 11 | 12 | print count(1000000000) 13 | -------------------------------------------------------------------------------- /tests/test_program_1.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | 3 | class MyClass: 4 | """A simple example class""" 5 | i = 12345 6 | 7 | def f(self): 8 | return 'hello world' 9 | 10 | def f0(): 11 | def f1(): 12 | pass 13 | print "hello" 14 | 15 | def func(a,b,*d,**e): 16 | global c,f 17 | return a+b+c+f 18 | 19 | add2 = lambda x,y:x+y 20 | 21 | fig, ax = plt.subplots() 22 | 23 | graph = open("plot-small.txt") 24 | 25 | l = [2*i for i in range(10) if i>0] 26 | 27 | i = 1 28 | 29 | del i 30 | 31 | a = 4 32 | def f(): 33 | a = 2 34 | def g(): 35 | b = a 36 | return b 37 | return g() 38 | 39 | cnt = 0 40 | b = 3 41 | (a,b) = (b,a) 42 | 43 | def hello_world(): 44 | #omp parallel num_threads(4) private(a,b,c) 45 | print "hello_world!" 46 | #omp parallel end 47 | pass 48 | 49 | def hello_world(): 50 | #omp parallel 51 | print "hello_world!" 52 | #omp parallel end 53 | pass 54 | 55 | ''' 56 | for line in graph.readlines(): 57 | points = list(map(float,line.split(' '))) 58 | x = [points[0],points[2]] 59 | y = [points[1],points[3]] 60 | cnt += 1 61 | if cnt%100 == 0: 62 | print(cnt) 63 | ax.plot(x, y, 'r-') 64 | 65 | ax.axis('equal') 66 | plt.show() 67 | 68 | graph.close() 69 | ''' 70 | 71 | def ending(): 72 | pass -------------------------------------------------------------------------------- /tests/test_program_2.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | 5 | 6 | def hello_world(): 7 | print "i write dazuoye!!!" 8 | print omp.get_thread_num(), '/', omp.get_num_threads() 9 | a = 2017 10 | # omp parallel num_threads(4) 11 | print "i love bianyishixi!" 12 | print omp.get_thread_num(), '/', omp.get_num_threads() 13 | print "a =", a 14 | # omp parallel end 15 | 16 | 17 | hello_world() 18 | 19 | num_step = 1000000 20 | step = 1.0 / num_step 21 | 22 | 23 | def calc_pi(): 24 | ans = [0] * 8 25 | # omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 26 | tid = omp.get_thread_num() 27 | nthrds = omp.get_num_threads() 28 | print "working: " + str(tid) + ' / ' + str(nthrds) 29 | tmp_ans = 0 30 | for i in xrange(tid, num_step, nthrds): 31 | x = (i + 0.5) * step 32 | tmp_ans += 4.0 / (1.0 + x * x) 33 | ans[tid] = tmp_ans 34 | # omp parallel end 35 | print sum(ans) * step 36 | 37 | 38 | calc_pi() 39 | 40 | 41 | def calc_pi_for(): 42 | ans = 0 43 | lock = omp.init_lock() 44 | # omp parallel num_threads(8) private(i,x) 45 | # omp for reduction(+:ans) 46 | for i in range(num_step): 47 | x = (i + 0.5) * step 48 | ans += 4.0 / (1.0 + x * x) 49 | # omp parallel end 50 | print ans * step 51 | 52 | 53 | calc_pi_for() 54 | 55 | 56 | def calc_pi_critical(): 57 | ans = 0 58 | lock = omp.init_lock() 59 | # omp parallel num_threads(8) private(i,x) 60 | # omp for 61 | for i in range(num_step): 62 | x = (i + 0.5) * step 63 | # omp critical 64 | ans += 4.0 / (1.0 + x * x) 65 | # omp critical end 66 | # omp parallel end 67 | print ans * step 68 | 69 | 70 | calc_pi_critical() 71 | 72 | 73 | def sections_test(): 74 | # omp parallel num_threads(2) 75 | # omp sections 76 | # omp section 77 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 78 | # omp section end 79 | # omp section 80 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 81 | # omp section end 82 | # omp sections end 83 | # omp parallel end 84 | 85 | 86 | sections_test() 87 | 88 | 89 | def barrier_test(): 90 | # omp parallel 91 | print str(omp.get_thread_num()) + ' a\n', 92 | time.sleep(random.randrange(3)) 93 | # omp barrier 94 | print str(omp.get_thread_num()) + ' b\n', 95 | time.sleep(random.randrange(3)) 96 | # omp barrier 97 | print str(omp.get_thread_num()) + ' c\n', 98 | # omp parallel end 99 | -------------------------------------------------------------------------------- /tests/test_program_out.py: -------------------------------------------------------------------------------- 1 | import omp 2 | omp.set_num_of_internal_locks(2) 3 | 4 | def hello_world(): 5 | print "i write dazuoye!!!" 6 | print omp.get_thread_num(),'/',omp.get_num_threads() 7 | a = 2017 8 | #omp parallel num_threads(4) 9 | print "i love bianyishixi!" 10 | print omp.get_thread_num(),'/',omp.get_num_threads() 11 | print "a =", a 12 | #omp parallel end 13 | 14 | hello_world() 15 | 16 | num_step = 10000 17 | step = 1.0/num_step 18 | 19 | def calc_pi(): 20 | ans = [0] * 8 21 | #omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 22 | tid = omp.get_thread_num() 23 | nthrds = omp.get_num_threads() 24 | print "working: "+str(tid)+' / '+str(nthrds) 25 | tmp_ans = 0 26 | for i in xrange(tid,num_step,nthrds): 27 | x = (i+0.5)*step 28 | tmp_ans += 4.0/(1.0 + x*x) 29 | ans[tid] = tmp_ans 30 | #omp parallel end 31 | print sum(ans)*step 32 | 33 | calc_pi() 34 | 35 | def calc_pi_for(): 36 | ans = 0 37 | lock = omp.init_lock() 38 | tmp_ans = omp.reduction_init('+') 39 | #omp parallel num_threads(8) private(i,x) 40 | for i in omp.prange(num_step): 41 | x = (i+0.5)*step 42 | ans += 4.0/(1.0 + x*x) 43 | omp.set_internal_lock(0) 44 | tmp_ans = omp.reduction('+', tmp_ans, ans) 45 | omp.unset_internal_lock(0) 46 | omp.barrier() 47 | ans = tmp_ans 48 | #omp parallel end 49 | print ans * step 50 | 51 | calc_pi_for() 52 | 53 | def calc_pi_critical(): 54 | ans = 0 55 | lock = omp.init_lock() 56 | #omp parallel num_threads(8) private(i,x) 57 | for i in omp.prange(num_step): 58 | x = (i+0.5)*step 59 | omp.set_internal_lock(1) 60 | ans += 4.0/(1.0 + x*x) 61 | omp.unset_internal_lock(1) 62 | #omp parallel end 63 | print ans * step 64 | 65 | calc_pi_critical() 66 | 67 | -------------------------------------------------------------------------------- /tests/threading_test.py: -------------------------------------------------------------------------------- 1 | import threading 2 | import time 3 | import random 4 | import omp 5 | local_num = 0 6 | def thread_cal(): 7 | global local_num 8 | time.sleep(1) 9 | for _ in range(5): 10 | local_num += 1 11 | time.sleep(random.random()) 12 | print threading.current_thread().getName(), local_num 13 | print omp.get_thread_num(),'/',omp.get_num_threads() 14 | 15 | tmp_num_threads = omp.get_num_threads() 16 | omp.set_num_threads(10) 17 | threads = [] 18 | for i in range(omp.get_num_threads()): 19 | threads.append(threading.Thread(target=thread_cal,name=str(i))) 20 | for t in threads: 21 | t.start() 22 | for t in threads: 23 | t.join() 24 | omp.set_num_threads(tmp_num_threads) 25 | 26 | print threading.current_thread().getName() -------------------------------------------------------------------------------- /tmp/log_code.txt: -------------------------------------------------------------------------------- 1 | function begin hello_world 4 2 | variable 6 10 omp load 3 | variable 6 37 omp load 4 | variable 7 4 a store 5 | variable 10 25 omp load 6 | variable 10 47 omp load 7 | variable 11 19 str load 8 | variable 11 23 a load 9 | function end hello_world 12 10 | variable 13 0 hello_world load 11 | variable 14 0 num_step store 12 | variable 15 0 step store 13 | variable 15 13 num_step load 14 | function begin calc_pi 16 15 | variable 17 4 ans store 16 | variable 19 4 tid store 17 | variable 19 10 omp load 18 | variable 20 4 nthrds store 19 | variable 20 13 omp load 20 | variable 21 24 str load 21 | variable 21 28 tid load 22 | variable 21 43 str load 23 | variable 21 47 nthrds load 24 | variable 22 4 tmp_ans store 25 | for begin 23 26 | variable 23 8 i store 27 | variable 23 13 xrange load 28 | variable 23 20 tid load 29 | variable 23 25 num_step load 30 | variable 23 35 nthrds load 31 | variable 24 8 x store 32 | variable 24 13 i load 33 | variable 24 24 step load 34 | variable 25 8 tmp_ans store 35 | variable 25 32 x load 36 | variable 25 36 x load 37 | for end 26 38 | variable 26 4 ans load 39 | variable 26 8 tid load 40 | variable 26 15 tmp_ans load 41 | variable 28 10 sum load 42 | variable 28 14 ans load 43 | variable 28 21 step load 44 | function end calc_pi 29 45 | variable 29 0 calc_pi load 46 | function begin calc_pi_for 30 47 | variable 31 4 ans store 48 | variable 32 4 lock store 49 | variable 32 11 omp load 50 | for begin 35 51 | variable 35 8 i store 52 | variable 35 13 xrange load 53 | variable 35 20 num_step load 54 | variable 36 8 x store 55 | variable 36 13 i load 56 | variable 36 24 step load 57 | variable 37 8 ans store 58 | variable 37 28 x load 59 | variable 37 32 x load 60 | for end 38 61 | variable 39 10 ans load 62 | variable 39 16 step load 63 | function end calc_pi_for 40 64 | variable 40 0 calc_pi_for load 65 | function begin calc_pi_critical 41 66 | variable 42 4 ans store 67 | variable 43 4 lock store 68 | variable 43 11 omp load 69 | for begin 46 70 | variable 46 8 i store 71 | variable 46 13 xrange load 72 | variable 46 20 num_step load 73 | variable 47 8 x store 74 | variable 47 13 i load 75 | variable 47 24 step load 76 | variable 49 8 ans store 77 | variable 49 28 x load 78 | variable 49 32 x load 79 | for end 50 80 | variable 52 10 ans load 81 | variable 52 16 step load 82 | function end calc_pi_critical 53 83 | variable 53 0 calc_pi_critical load 84 | function begin sections_test 54 85 | variable 58 30 str load 86 | variable 58 34 omp load 87 | variable 61 30 str load 88 | variable 61 34 omp load 89 | function end sections_test 62 90 | variable 65 0 sections_test load 91 | function begin barrier_test 66 92 | variable 68 10 str load 93 | variable 68 14 omp load 94 | variable 69 4 time load 95 | variable 69 15 random load 96 | variable 71 10 str load 97 | variable 71 14 omp load 98 | variable 72 4 time load 99 | variable 72 15 random load 100 | variable 74 10 str load 101 | variable 74 14 omp load 102 | function end barrier_test 75 103 | variable 76 0 barrier_test load 104 | -------------------------------------------------------------------------------- /tmp/log_omp.txt: -------------------------------------------------------------------------------- 1 | parallel 8 begin 2 | num_threads 4 3 | parallel 12 end 4 | parallel 18 begin 5 | num_threads 8 6 | private_var i 7 | private_var tid 8 | private_var nthrds 9 | private_var tmp_ans 10 | private_var x 11 | parallel 27 end 12 | parallel 33 begin 13 | num_threads 8 14 | private_var i 15 | private_var x 16 | for 34 17 | reduction + ans 18 | nowait False 19 | scheduling_type static 20 | parallel 38 end 21 | parallel 44 begin 22 | num_threads 8 23 | private_var i 24 | private_var x 25 | for 45 26 | nowait False 27 | scheduling_type static 28 | critical 48 begin 29 | critical 50 end 30 | parallel 51 end 31 | parallel 55 begin 32 | num_threads 2 33 | sections 56 begin 34 | section 57 begin 35 | section 59 end 36 | section 60 begin 37 | section 62 end 38 | sections 63 end 39 | parallel 64 end 40 | parallel 67 begin 41 | num_threads 0 42 | barrier 70 43 | barrier 73 44 | parallel 75 end 45 | -------------------------------------------------------------------------------- /tmp/no_blank_line.py: -------------------------------------------------------------------------------- 1 | import omp 2 | import time 3 | import random 4 | def hello_world(): 5 | print "i write dazuoye!!!" 6 | print omp.get_thread_num(), '/', omp.get_num_threads() 7 | a = 2017 8 | # omp parallel num_threads(4) 9 | print "i love bianyishixi!\n", 10 | print '%d / %d\n' % (omp.get_thread_num(), omp.get_num_threads()), 11 | print "a = " + str(a) +'\n', 12 | # omp parallel end 13 | hello_world() 14 | num_step = 100000 15 | step = 1.0 / num_step 16 | def calc_pi(): 17 | ans = [0] * 8 18 | # omp parallel num_threads(8) private(i,tid,nthrds,tmp_ans,x) 19 | tid = omp.get_thread_num() 20 | nthrds = omp.get_num_threads() 21 | print "working: " + str(tid) + ' / ' + str(nthrds) + '\n', 22 | tmp_ans = 0 23 | for i in xrange(tid, num_step, nthrds): 24 | x = (i + 0.5) * step 25 | tmp_ans += 4.0 / (1.0 + x * x) 26 | ans[tid] = tmp_ans 27 | # omp parallel end 28 | print sum(ans) * step 29 | calc_pi() 30 | def calc_pi_for(): 31 | ans = 0 32 | lock = omp.init_lock() 33 | # omp parallel num_threads(8) private(i,x) 34 | # omp for reduction(+:ans) 35 | for i in xrange(num_step): 36 | x = (i + 0.5) * step 37 | ans += 4.0 / (1.0 + x * x) 38 | # omp parallel end 39 | print ans * step 40 | calc_pi_for() 41 | def calc_pi_critical(): 42 | ans = 0 43 | lock = omp.init_lock() 44 | # omp parallel num_threads(8) private(i,x) 45 | # omp for 46 | for i in xrange(num_step): 47 | x = (i + 0.5) * step 48 | # omp critical 49 | ans += 4.0 / (1.0 + x * x) 50 | # omp critical end 51 | # omp parallel end 52 | print ans * step 53 | calc_pi_critical() 54 | def sections_test(): 55 | # omp parallel num_threads(2) 56 | # omp sections 57 | # omp section 58 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n', 59 | # omp section end 60 | # omp section 61 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n', 62 | # omp section end 63 | # omp sections end 64 | # omp parallel end 65 | sections_test() 66 | def barrier_test(): 67 | # omp parallel 68 | print str(omp.get_thread_num()) + ' a\n', 69 | time.sleep(random.randrange(3)) 70 | # omp barrier 71 | print str(omp.get_thread_num()) + ' b\n', 72 | time.sleep(random.randrange(3)) 73 | # omp barrier 74 | print str(omp.get_thread_num()) + ' c\n', 75 | # omp parallel end 76 | barrier_test() 77 | pass 78 | --------------------------------------------------------------------------------