├── .gitignore
├── .idea
├── .name
├── dictionaries
│ └── zhuohan123.xml
├── inspectionProfiles
│ ├── Project_Default.xml
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
├── openmp-for-python-2.7.iml
└── workspace.xml
├── README.md
├── adds-out.py
├── adds.py
├── cpu_count.txt
├── lib
├── Core.cpp
├── Core.h
├── OMP.cpp
├── OMP.h
├── PY.cpp
├── PY.h
├── Toolkit.cpp
├── Toolkit.h
├── cpu_count.py
├── delete_empty_lines.py
├── main.cpp
├── main.h
├── makefile
├── parse_omp.py
├── parseprint.py
├── parseprint.pyc
├── transform
└── tree.py
├── omp.py
├── other-examples-out.py
├── other-examples.py
├── pyomp
├── std_test
├── MatrixMultiple-out.py
├── MatrixMultiple.py
├── adds-out.py
├── adds.py
├── pi-out.py
├── pi.py
├── test_barrier-out.py
├── test_barrier.py
├── test_for_critical_nowait-out.py
├── test_for_critical_nowait.py
├── test_for_reduce_dynamic-out.py
├── test_for_reduce_dynamic.py
├── test_haswait-out.py
├── test_haswait.py
├── test_nowait-out.py
├── test_nowait.py
├── test_parallel-out.py
├── test_parallel.py
├── test_sections-out.py
├── test_sections.py
├── test_variable-out.py
└── test_variable.py
├── tests
├── MatrixMultiple.py
├── adds.py
├── list_test.py
├── omp_lib_test.py
├── other-examples.py
├── pi.py
├── test_program.py
├── test_program_1.py
├── test_program_2.py
├── test_program_out.py
└── threading_test.py
└── tmp
├── log_code.txt
├── log_omp.txt
└── no_blank_line.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 |
28 | # PyInstaller
29 | # Usually these files are written by a python script from a template
30 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
31 | *.manifest
32 | *.spec
33 |
34 | # Installer logs
35 | pip-log.txt
36 | pip-delete-this-directory.txt
37 |
38 | # Unit test / coverage reports
39 | htmlcov/
40 | .tox/
41 | .coverage
42 | .coverage.*
43 | .cache
44 | nosetests.xml
45 | coverage.xml
46 | *.cover
47 | .hypothesis/
48 |
49 | # Translations
50 | *.mo
51 | *.pot
52 |
53 | # Django stuff:
54 | *.log
55 | local_settings.py
56 |
57 | # Flask stuff:
58 | instance/
59 | .webassets-cache
60 |
61 | # Scrapy stuff:
62 | .scrapy
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # Jupyter Notebook
71 | .ipynb_checkpoints
72 |
73 | # pyenv
74 | .python-version
75 |
76 | # celery beat schedule file
77 | celerybeat-schedule
78 |
79 | # SageMath parsed files
80 | *.sage.py
81 |
82 | # dotenv
83 | .env
84 |
85 | # virtualenv
86 | .venv
87 | venv/
88 | ENV/
89 |
90 | # Spyder project settings
91 | .spyderproject
92 | .spyproject
93 |
94 | # Rope project settings
95 | .ropeproject
96 |
97 | # mkdocs documentation
98 | /site
99 |
100 | # mypy
101 | .mypy_cache/
102 |
103 | # idea
104 | .idea/
105 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | openmp-for-python-2.7
--------------------------------------------------------------------------------
/.idea/dictionaries/zhuohan123.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | AngularJS
14 |
15 |
16 |
17 |
18 |
19 |
20 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/openmp-for-python-2.7.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 | true
108 | DEFINITION_ORDER
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 | project
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
414 |
415 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 | 1492077291270
493 |
494 |
495 | 1492077291270
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
525 |
526 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
589 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
664 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
694 |
695 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
731 |
732 |
733 |
734 |
735 |
736 |
737 |
738 |
739 |
740 |
741 |
742 |
743 |
744 |
745 |
746 |
747 |
748 |
749 |
750 |
751 |
752 |
753 |
754 |
755 |
756 |
757 |
758 |
759 |
760 |
761 |
762 |
763 |
764 |
765 |
766 |
767 |
768 |
769 |
770 |
771 |
772 |
773 |
774 |
775 |
776 |
777 |
778 |
779 |
780 |
781 |
782 |
783 |
784 |
785 |
786 |
787 |
788 |
789 |
790 |
791 |
792 |
793 |
794 |
795 |
796 |
797 |
798 |
799 |
800 |
801 |
802 |
803 |
804 |
805 |
806 |
807 |
808 |
809 |
810 |
811 |
812 |
813 |
814 |
815 |
816 |
817 |
818 |
819 |
820 |
821 |
822 |
823 |
824 |
825 |
826 |
827 |
828 |
829 |
830 |
831 |
832 |
833 |
834 |
835 |
836 |
837 |
838 |
839 |
840 |
841 |
842 |
843 |
844 |
845 |
846 |
847 |
848 |
849 |
850 |
851 |
852 |
853 |
854 |
855 |
856 |
857 |
858 |
859 |
860 |
861 |
862 |
863 |
864 |
865 |
866 |
867 |
868 |
869 |
870 |
871 |
872 |
873 |
874 |
875 |
876 |
877 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # OpenMP for Python
2 |
3 | 黎才华,李卓翰
4 |
5 | ## 代码执行方法
6 |
7 | 1. 进入lib文件夹,使用make命令编译
8 | 2. 在主文件夹中对于任意的python程序x.py,可用如下指令生成对应的翻译后的y.py
9 | 3. 再在主文件夹下用jython运行y.py即可,测试集在std_test中
10 |
11 | ~~~bash
12 | ./pyomp x.py y.py
13 | ~~~
14 |
15 |
16 | ## 问题描述
17 |
18 | ### Python的多线程现状
19 |
20 | 众所周知,Python语言的多线程并行并不好用。其中的一个原因,就是GIL(Global Interpreter Lock)的影响。这是Python解释器的实现CPython引入的概念。由于CPython的内存管理不是线程安全的,因此CPython引入了一个全局信号量来迫使在同一时间只能有一个线程在运行Python的解释器。因此,Python的多线程 (import threading) 是伪多线程。事实上还会出现多线程跑的比单线程慢的情况。虽然这只是Python的其中一个解释器的问题(如Jython等其他实现就没有这种问题)但由于Cpython的广泛使用,这个问题还是比较严重。
21 |
22 | ### OpenMP
23 |
24 | OpenMP是一种并行程序的编译指导方案,使用Shared Memory Model,支持C/C++/Fortran。简单易用,支持多种并行编程的模型。并且在实现上,OpenMP是先将源代码翻译为对应的Pthreads代码,然后再由一般的编译器进行编译。
25 |
26 | ### 我们的问题
27 |
28 | 我们的目标是为Python实现一个多线程并行OpenMP,让添加指令后的程序即使在没有OpenMP的情况下也能够被解释执行,即将代码
29 |
30 | ~~~python
31 | #omp parallel num_threads(8)
32 | print "i love bianyishixi"
33 | #omp parallel end
34 | ~~~
35 |
36 | 转换为
37 |
38 | ~~~python
39 | def parallel_module():
40 | print "i love bianyishixi"
41 |
42 | threads = []
43 | for i in range(8):
44 | threads.append(threading.Thread(target=parallel_module))
45 | threads[i].start()
46 | ~~~
47 |
48 | 然后再将转换后的程序交由非CPython的Python解释(如Jython)执行。
49 |
50 | ## 实现方案
51 |
52 | ### 实现语句总表
53 |
54 | ~~~python
55 | #omp parallel [end] [num_threads(n)] [private(v1, v2, …)]
56 | #omp for [nowait] [reduction(op : v1, v2, …)] [schedule(dynamic/static)]
57 | #omp sections [end]
58 | #omp section [end]
59 | #omp critical [end]
60 | #omp barrier
61 |
62 | omp.get_thread_num()
63 | omp.get_num_threads()
64 | omp.set_num_threads(n)
65 | ~~~
66 |
67 | ### Parallel语句
68 |
69 | 我们整个项目的前半期主要都围绕着如何将parallel语句并行化展开的。我们主要利用了Python可以在函数内部定义函数的性质,我们直接在需要并行的代码块处原地定义一个新的函数,然后再在后面补充相应的对于threading库的调用。我们用
70 |
71 | ~~~python
72 | #omp parallel
73 | #omp parallel end
74 | ~~~
75 |
76 | 分别来表示并行块的开始和结束,则代码
77 |
78 | ~~~python
79 | def f():
80 | print "i write dazuoye"
81 | #omp parallel num_threads(8)
82 | print "i love bianyishixi"
83 | #omp parallel end
84 | ~~~
85 |
86 | 将被翻译为
87 |
88 | ~~~python
89 | def f():
90 | print "i write dazuoye"
91 | def parallel_module():
92 | print "i love bianyishixi"
93 | threads = []
94 | for i in range(8):
95 | threads.append(threading.Thread(target=parallel_module))
96 | for i in range(8):
97 | threads[i].start()
98 | ~~~
99 |
100 | 但是这么做会有一个问题:在函数内再定义的函数里被赋值的变量,将会变成新函数的局部变量。如果我们要修改原来函数的局部变量,就必须想办法引用到原先的变量。因此,直接简单的修改会导致变量的作用域发生变化。
101 |
102 | 由于Python的对象分为可变对象与不可变对象,外层函数的可变对象可以被内层函数所修改,因此我们将一个函数的全部局部变量都存在可变对象里(如一个dictionary中),就可以避免上述问题,例如
103 |
104 | ~~~python
105 | def f():
106 | a = 2
107 | #omp parallel num_threads(8)
108 | print a
109 | a = 1
110 | #omp parallel end
111 | ~~~
112 |
113 | 就可被翻译为
114 |
115 | ~~~python
116 | def f():
117 | dic_f[‘a’] = 2
118 | #omp parallel num_threads(8)
119 | print dic_f[‘a’]
120 | dic_f[‘a’] = 1
121 | #omp parallel end
122 | ~~~
123 |
124 | 但是这样做就意味着我们需要得到原程序中的每一变量具体的作用域,因此我们对整个程序做了语法分析,生成了AST。
125 |
126 | 由于循环块内部会有局部变量(不在线程间共享),我们实现了private语句,用于指定有哪些变量需要作为私有变量,格式如下
127 |
128 | ~~~python
129 | #omp parallel private(x, y, z, ...)
130 | ~~~
131 |
132 |
133 | ### For语句
134 |
135 | 对于for语句,我们将对于range/xrange的循环与对一般的列表的分开处理。对于range/xrange的循环,例如:
136 |
137 | ~~~python
138 | #omp for
139 | for i in xrange(b, e, s):
140 | dosth
141 | ~~~
142 |
143 | 我们在库中实现了对应的prange(b, e, s),使得他能够自动根据线程编号来获得其所需要执行的区间所对应的xrange,即将代码翻译成
144 |
145 | ~~~python
146 | for i in omp.prange(b, e, s):
147 | dosth
148 | ~~~
149 |
150 | 之后,假设循环的区间为
151 |
152 | ~~~python
153 | [0, 1, 2, 3]
154 | ~~~
155 |
156 | 则当有两个进程执行时,它们将分别执行
157 |
158 | ~~~python
159 | 0: [0, 2]
160 | 1: [1, 3]
161 | ~~~
162 |
163 | 而对于列表的循环,我们实现了对应的plist函数,它使得每个进程来筛出属于自己的部分序列来执行。例如对于循环
164 |
165 | ~~~python
166 | #omp for
167 | for i in ['x', 'y', 'z', 'w']:
168 | dosth
169 | ~~~
170 |
171 | 会被翻译为
172 |
173 | ~~~python
174 | for i in omp.plist(['x', 'y', 'z', 'w']):
175 | dosth
176 | ~~~
177 |
178 | 则当有两个进程执行时,它们将分别执行
179 |
180 | ~~~python
181 | 0: ['x', 'z']
182 | 1: ['y', 'w']
183 | ~~~
184 |
185 | 除了上面的静态调度方式,我们还支持了如下的动态调度方式:
186 |
187 | ~~~python
188 | #omp for schedule(dynamic)
189 | for i in xrange(n):
190 | dosth
191 | ~~~
192 |
193 | 在这种情况下,每一个进程会动态的执行每一次循环的任务。当一个进程完成任务之后,它会找到目前最近一个未被完成的任务来执行。
194 |
195 | 由于循环的特殊性,我们增加了reduction操作:
196 |
197 | ~~~python
198 | #omp for reduction(+: s)
199 | for i in xrange(n):
200 | s += i
201 | ~~~
202 |
203 | 它能够使得各个线程间的s互不干扰,然后再在最后做一次reduction操作。具体而言,上述代码将被翻译为
204 |
205 | ~~~python
206 | for i in xrange(n):
207 | tmp_s += i
208 | #omp critical
209 | s += tmp_s
210 | #omp critical end
211 | ~~~
212 |
213 | 我们的reduction支持基本上所有的算数运算符,具体有:
214 |
215 | ~~~python
216 | +, -, *, max, min, &, |, ^, and, or
217 | ~~~
218 |
219 | ### Sections语句
220 |
221 | 对于如下语句
222 |
223 | ~~~python
224 | #omp sections
225 | #omp section
226 | A
227 | #omp section end
228 | #omp section
229 | B
230 | #omp section end
231 | #omp sections end
232 | ~~~
233 |
234 | 翻译过后的程序将会使得有两个线程分别执行代码段A与B。
235 |
236 | 我们考虑将Sections翻译为for循环,再由for语句来完成并行化,即上面的语句将被翻译为
237 |
238 | ~~~python
239 | #omp for
240 | for i in range(2):
241 | if i==0:
242 | A
243 | elif i==1:
244 | B
245 | ~~~
246 |
247 | ### Critical语句
248 |
249 | critical语句用来确保每条语句在同一时刻只会被一个线程执行,它由用加减锁操作来完成。在翻译源程序时,每碰到一个新的critical语句,分配一个新的全局锁,在代码段前加锁,代码段后解锁。即代码
250 |
251 | ~~~python
252 | #omp critical
253 | x
254 | #omp critical end
255 | ~~~
256 |
257 | 将被翻译为
258 |
259 | ~~~python
260 | omp.set_lock()
261 | x
262 | omp.unset_lock()
263 | ~~~
264 |
265 | ### Barrier语句
266 |
267 | Barrier语句的形式为
268 |
269 | ~~~python
270 | #omp barrier
271 | ~~~
272 |
273 | 这条语句会使得在某一时刻,所有较快的进程等待较慢的进程执行到该点后,再统一执行下面的语句。
274 |
275 | 这一处代码是由三个信号量(mutex)实现的:
276 |
277 | ~~~python
278 | class _Barrier:
279 | def __init__(self, n):
280 | self.n = n
281 | self.count = 0
282 | self.mutex = threading.Semaphore(1)
283 | self.barrier = threading.Semaphore(0)
284 | self.barrier2 = threading.Semaphore(1)
285 |
286 | def wait(self):
287 | self.mutex.acquire()
288 | self.count += 1
289 | if self.count == self.n:
290 | self.barrier2.acquire()
291 | self.barrier.release()
292 | self.mutex.release()
293 |
294 | self.barrier.acquire()
295 | self.barrier.release()
296 |
297 | self.mutex.acquire()
298 | self.count -= 1
299 | if self.count == 0:
300 | self.barrier.acquire()
301 | self.barrier2.release()
302 | self.mutex.release()
303 |
304 | self.barrier2.acquire()
305 | self.barrier2.release()
306 | ~~~
307 |
308 | 另外,for、sections后均默认存在一个barrier,For的barrier可显式的由nowait指令取消。
309 |
310 | ## 测试集设计
311 |
312 | 我们针对不同的目标设计了两类测试集:
313 |
314 | 1. 第一类测试集是针对不同的OpenMP语句设计测试数据,首先人工比较编译前后的程序是否等价,然后运行程序比较其输出是否合理,以验证编译器的正确性。
315 | 2. 第二类测试集主要针对for语句,把OpenMP for Python分别应用到求和问题/求Pi问题问题上,这两个问题是OpenMP应用的最经典的问题,改变线程数,观察程序性能以说明我们的编译器是有效的。
316 |
317 | 注意:等价性以及正确性的定义是:编译器按OpenMP指令进行编译且运行结果符合编写者的意图,而不完全是编译前后的两个程序输出完全一样(显然不合理且不可能)。
318 |
319 | ### 第一类测试集
320 | 第一类测试集主要关注于全面测试已提供的所有语句的正确性,目的在于用最简单易懂的测试样例说明我们的操作是正确的,为此,附上编译前后的代码,以及比较程序的输出结果予以说明。该测试暂时不考虑性能问题,性能问题会在第二类测试集中涉及。
321 |
322 | #### 0.Variable
323 | 该测试数据并没有任何OpenMP语句,但其判断变量是否需要替换,以及如何替换,关系重大。基本原则是:global变量不替换,local变量放入dictionary并进行替换,介于local和global的变量按python 3中nonlocal语句的逻辑由内向外找然后替换,特殊的local变量如lambda和comprehension中的以及声明了private的local变量不替换。
324 |
325 | 编译前:
326 |
327 | ~~~python
328 | import omp
329 |
330 | class MyClass:
331 | i = 12345
332 | def f(self):
333 | return 'hello world'
334 | tmp = MyClass()
335 | print tmp.f()
336 |
337 | c = 1
338 | f = 2
339 | def func(a,b,*d,**e):
340 | global c,f
341 | return a+b+c+f
342 | print func(3,4,None,None)
343 |
344 | add2 = lambda x,y:x+y
345 | print add2(1,2)
346 |
347 | l = [2*i for i in range(10) if i>0]
348 | print l
349 |
350 | a = 4
351 | def f():
352 | a = 2
353 | def g():
354 | b = a
355 | return b
356 | return g()
357 | print f()
358 | ~~~
359 |
360 | 编译后:
361 |
362 | ~~~python
363 | import omp
364 | omp.set_num_of_internal_locks(0)
365 | class MyClass:
366 | i = 12345
367 | def f(self):
368 | _dict1={}
369 | _dict1['self']=self
370 | return 'hello world'
371 | tmp = MyClass()
372 | print tmp.f()
373 | c = 1
374 | f = 2
375 | def func(a,b,*d,**e):
376 | _dict2={}
377 | _dict2['a']=a
378 | _dict2['b']=b
379 | _dict2['d']=d
380 | _dict2['e']=e
381 | global c,f
382 | return _dict2['a']+_dict2['b']+c+f
383 | print func(3,4,None,None)
384 | add2 = lambda x,y:x+y
385 | print add2(1,2)
386 | l = [2*i for i in range(10) if i>0]
387 | print l
388 | a = 4
389 | def f():
390 | _dict3={}
391 | _dict3['a'] = 2
392 | def g():
393 | _dict4={}
394 | _dict4['b'] = _dict3['a']
395 | return _dict4['b']
396 | return g()
397 | print f()
398 | pass
399 | ~~~
400 |
401 | 运行结果比较:
402 |
403 | 编译前:
404 |
405 | ~~~
406 | hello world
407 | 10
408 | 3
409 | [2, 4, 6, 8, 10, 12, 14, 16, 18]
410 | 2
411 | ~~~
412 |
413 | 编译后:
414 |
415 | ~~~
416 | hello world
417 | 10
418 | 3
419 | [2, 4, 6, 8, 10, 12, 14, 16, 18]
420 | 2
421 | ~~~
422 |
423 | #### 1. Parallel
424 |
425 | Parallel是最基本的语句,如上所述,主要通过新建并行块函数并在末尾用threading多次调用(封装在omp.parallel_run中)以实现多线程。该语句最能体现使用了多线程。编译前:
426 |
427 | ~~~python
428 | import omp
429 |
430 | def hello_world():
431 | print "i write dazuoye!!!"
432 | print omp.get_thread_num(),'/',omp.get_num_threads()
433 | a = 2017
434 | #omp parallel num_threads(4)
435 | print "i love bianyishixi!"
436 | print omp.get_thread_num(),'/',omp.get_num_threads()
437 | print "a =", a
438 | #omp parallel end
439 |
440 | hello_world()
441 | ~~~
442 |
443 | 编译后:
444 |
445 | ~~~python
446 | import omp
447 | omp.set_num_of_internal_locks(0)
448 | def hello_world():
449 | _dict1={}
450 | print "i write dazuoye!!!"
451 | print omp.get_thread_num(),'/',omp.get_num_threads()
452 | _dict1['a'] = 2017
453 | #omp parallel num_threads(4)
454 | def _block0():
455 | print "i love bianyishixi!"
456 | print omp.get_thread_num(),'/',omp.get_num_threads()
457 | print "a =", _dict1['a']
458 | #omp parallel end
459 | omp.parallel_run(_block0,4)
460 | hello_world()
461 | pass
462 | ~~~
463 |
464 | 运行结果比较:
465 |
466 | 编译前:
467 |
468 | ~~~
469 | i write dazuoye!!!
470 | 0 / 1
471 | i love bianyishixi!
472 | 0 / 1
473 | a = 2017
474 | ~~~
475 |
476 | 编译后:
477 |
478 | ~~~
479 | i write dazuoye!!!
480 | 0 / 1
481 | i love bianyishixi!
482 | 0 / 4
483 | a = 2017
484 | i love bianyishixi!
485 | 1 / 4
486 | a = 2017
487 | i love bianyishixi!
488 | 2 / 4
489 | a = 2017
490 | i love bianyishixi!
491 | 3 / 4
492 | a = 2017
493 | ~~~
494 |
495 | #### 2.Sections
496 |
497 | Sections语句主要利用了下面将要介绍的for语句搭配if语句实现,不需要原地新建函数,与源代码一一对应关系简洁明了。注意默认末尾会添加omp.barrier()(后面将介绍)。
498 |
499 | 编译前:
500 |
501 | ~~~python
502 | import omp
503 |
504 | def sections_test():
505 | # omp parallel num_threads(2)
506 | # omp sections
507 | # omp section
508 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n',
509 | # omp section end
510 | # omp section
511 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n',
512 | # omp section end
513 | # omp sections end
514 | # omp parallel end
515 |
516 | sections_test()
517 | ~~~
518 |
519 | 编译后:
520 |
521 | ~~~python
522 | import omp
523 | omp.set_num_of_internal_locks(0)
524 | def sections_test():
525 | _dict1={}
526 | # omp parallel num_threads(2)
527 | def _block0():
528 | # omp sections
529 | for OMP_SECTIONS_ID in omp.prange(2):
530 | # omp section
531 | if OMP_SECTIONS_ID == 0:
532 | print 'section 0 from ' + str(omp.get_thread_num()) + '\n',
533 | # omp section end
534 | # omp section
535 | if OMP_SECTIONS_ID == 1:
536 | print 'section 1 from ' + str(omp.get_thread_num()) + '\n',
537 | # omp section end
538 | # omp sections end
539 | omp.barrier()
540 | # omp parallel end
541 | omp.parallel_run(_block0,2)
542 | sections_test()
543 | pass
544 | ~~~
545 |
546 | 运行结果比较:
547 |
548 | 编译前:
549 |
550 | ~~~
551 | section 0 from 0
552 | section 1 from 0
553 | ~~~
554 |
555 | 编译后:
556 |
557 | ~~~
558 | section 0 from 0
559 | section 1 from 1
560 | ~~~
561 |
562 | #### 3.For + reduction + dynamic
563 |
564 | For+reduction的组合是最常用的语句,也是最能体现并行加速效果的语句,关键在于for的多个循环的如何分配给不同线程(使用plist/dlist/prange/drange),以及reduction时需要加锁。另外,此处需要variable替换策略的一点点配合,也即需要新的临时变量存储每个线程的累加/累乘信息,在最后汇总也需要variable替换策略的配合。
565 |
566 | 编译前:
567 |
568 | ~~~python
569 | import omp
570 |
571 | num_step = 1000000
572 | step = 1.0 / num_step
573 |
574 | def calc_pi_for():
575 | ans = 0
576 | # omp parallel num_threads(8) private(i,x)
577 | # omp for reduction(+:ans) schedule(dynamic)
578 | for i in range(num_step):
579 | x = (i + 0.5) * step
580 | ans += 4.0 / (1.0 + x * x)
581 | # omp parallel end
582 | print ans * step
583 |
584 | calc_pi_for()
585 | ~~~
586 |
587 | 编译后:
588 |
589 | ~~~python
590 | import omp
591 | omp.set_num_of_internal_locks(1)
592 | num_step = 1000000
593 | step = 1.0 / num_step
594 | def calc_pi_for():
595 | _dict1={}
596 | _dict1['ans'] = 0
597 | # omp parallel num_threads(8) private(i,x)
598 | def _block0():
599 | # omp for reduction(+:ans) schedule(dynamic)
600 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+')
601 | for i in omp.drange(num_step):
602 | x = (i + 0.5) * step
603 | OMP_REDUCTION_VAR_0_0 += 4.0 / (1.0 + x * x)
604 | omp.set_internal_lock(0)
605 | _dict1['ans'] = omp.reduction('+',_dict1['ans'],OMP_REDUCTION_VAR_0_0)
606 | omp.unset_internal_lock(0)
607 | omp.barrier()
608 | # omp parallel end
609 | omp.parallel_run(_block0,8)
610 | print _dict1['ans'] * step
611 | calc_pi_for()
612 | pass
613 | ~~~
614 |
615 | 运行结果比较:
616 |
617 | 编译前:
618 |
619 | ~~~
620 | 3.14159265359
621 | ~~~
622 |
623 | 编译后:
624 |
625 | ~~~
626 | 3.14159265359
627 | ~~~
628 |
629 | #### 4. For + critical + nowait
630 |
631 | 虽然同样在计算pi的问题中,使用reduction减少了使用锁的次数,会比使用critical(每次累加都需要加锁解锁)要好,但该数据只为了呈现critical的作用,关注点在于正确性,暂时不考虑性能问题。
632 |
633 | 编译前:
634 |
635 | ~~~python
636 | import omp
637 |
638 | num_step = 1000000
639 | step = 1.0 / num_step
640 |
641 | def calc_pi_critical():
642 | ans = 0
643 | # omp parallel num_threads(8) private(i,x)
644 | # omp for nowait
645 | for i in range(num_step):
646 | x = (i + 0.5) * step
647 | # omp critical
648 | ans += 4.0 / (1.0 + x * x)
649 | # omp critical end
650 | # omp parallel end
651 | print ans * step
652 |
653 | calc_pi_critical()
654 | ~~~
655 |
656 | 编译后:
657 |
658 | ~~~python
659 | import omp
660 | omp.set_num_of_internal_locks(2)
661 | num_step = 1000000
662 | step = 1.0 / num_step
663 | def calc_pi_critical():
664 | _dict1={}
665 | _dict1['ans'] = 0
666 | # omp parallel num_threads(8) private(i,x)
667 | def _block0():
668 | # omp for nowait
669 | for i in omp.prange(num_step):
670 | x = (i + 0.5) * step
671 | # omp critical
672 | omp.set_internal_lock(1)
673 | _dict1['ans'] += 4.0 / (1.0 + x * x)
674 | omp.unset_internal_lock(1)
675 | # omp critical end
676 | # omp parallel end
677 | omp.parallel_run(_block0,8)
678 | print _dict1['ans'] * step
679 | calc_pi_critical()
680 | pass
681 | ~~~
682 |
683 | 运行结果比较:
684 |
685 | 编译前:
686 |
687 | ~~~
688 | 3.14159265359
689 | ~~~
690 |
691 | 编译后:
692 |
693 | ~~~
694 | 3.14159265359
695 | ~~~
696 |
697 | #### 5. Barrier
698 |
699 | 在输出a和b之间没有barrier,所以a和b理应会混杂着输出,而输出c之前有barrier,所以快的程序会等待慢的程序,然后连续输出c。
700 |
701 | 编译前:
702 |
703 | ~~~python
704 | import omp
705 | import time
706 | import random
707 |
708 | def barrier_test():
709 | # omp parallel
710 | print str(omp.get_thread_num()) + ' a\n',
711 | time.sleep(random.randrange(3))
712 | print str(omp.get_thread_num()) + ' b\n',
713 | time.sleep(random.randrange(3))
714 | # omp barrier
715 | print str(omp.get_thread_num()) + ' c\n',
716 | # omp parallel end
717 |
718 | barrier_test()
719 | ~~~
720 |
721 | 编译后:
722 |
723 | ~~~python
724 | import omp
725 | omp.set_num_of_internal_locks(0)
726 | import time
727 | import random
728 | def barrier_test():
729 | _dict1={}
730 | # omp parallel
731 | def _block0():
732 | print str(omp.get_thread_num()) + ' a\n',
733 | time.sleep(random.randrange(3))
734 | print str(omp.get_thread_num()) + ' b\n',
735 | time.sleep(random.randrange(3))
736 | # omp barrier
737 | omp.barrier()
738 | print str(omp.get_thread_num()) + ' c\n',
739 | # omp parallel end
740 | omp.parallel_run(_block0,0)
741 | barrier_test()
742 | pass
743 | ~~~
744 |
745 | 运行结果比较:
746 |
747 | 编译前:
748 |
749 | ~~~
750 | 0 a
751 | 0 b
752 | 0 c
753 | ~~~
754 |
755 | 编译后:
756 |
757 | ~~~
758 | 0 a
759 | 1 a
760 | 2 a
761 | 2 b
762 | 3 a
763 | 1 b
764 | 0 b
765 | 3 b
766 | 1 c
767 | 0 c
768 | 3 c
769 | 2 c
770 | ~~~
771 |
772 | #### 6. Nowait
773 |
774 | 该组测试数据比较特殊,需比较是否包含nowait的两组程序的输出,然后才能体现nowait的作用。也即有nowait时,运行得快的程序会先运行程序后面的部分,先输出done,而没有nowait时,运行得快的程序需要等待慢的程序。
775 |
776 | 带有nowait的:
777 |
778 | ~~~python
779 | import omp
780 | import time
781 | import random
782 |
783 | def nowait_test():
784 | # omp parallel num_threads(8) private(i)
785 | # omp for nowait
786 | for i in range(8):
787 | time.sleep(random.randrange(3))
788 | print str(omp.get_thread_num()) + ' thread\n',
789 | print "done"
790 | # omp parallel end
791 |
792 | nowait_test()
793 | ~~~
794 |
795 | 不带nowait的:
796 |
797 | ~~~python
798 | import omp
799 | import time
800 | import random
801 |
802 | def nowait_test():
803 | # omp parallel num_threads(8) private(i)
804 | # omp for
805 | for i in range(8):
806 | time.sleep(random.randrange(3))
807 | print str(omp.get_thread_num()) + ' thread\n',
808 | print "done"
809 | # omp parallel end
810 |
811 | nowait_test()
812 | ~~~
813 |
814 | 运行结果比较:
815 |
816 | 带有nowait:
817 |
818 | ~~~
819 | 0 thread
820 | done
821 | 1 thread
822 | done
823 | 3 thread
824 | done
825 | 5 thread
826 | done
827 | 6 thread
828 | done
829 | 2 thread
830 | done
831 | 7 thread
832 | done
833 | 4 thread
834 | done
835 | ~~~
836 |
837 | 没有nowait:
838 |
839 | ~~~
840 | 2 thread
841 | 0 thread
842 | 1 thread
843 | 3 thread
844 | 4 thread
845 | 5 thread
846 | 6 thread
847 | 7 thread
848 | done
849 | done
850 | done
851 | done
852 | done
853 | done
854 | done
855 | done
856 | ~~~
857 |
858 | #### 7. MatrixMultiple
859 |
860 | 除单元测试外,我们也在一般的程序上做了测试,如矩阵乘法就是其中之一,其代码如下:
861 |
862 | ~~~python
863 | import omp
864 | import random
865 |
866 | def genMatrix(n):
867 | return [random.randrange(n) for i in range(n*n)]
868 |
869 | def matrixMul(N, a, b):
870 | res = [0 for i in range(N*N)]
871 | # omp parallel num_threads(2) private(n,i,j,tmp,k)
872 | # omp for
873 | for n in range(N*N):
874 | i = n / N
875 | j = n % N
876 | tmp = 0
877 | for k in range(N):
878 | tmp = tmp + a[i*N+k] * b[k*N+j]
879 | res[n] = tmp
880 | # omp parallel end
881 | return res
882 |
883 | n = 3
884 | a = genMatrix(n)
885 | b = genMatrix(n)
886 | print a
887 | print b
888 | print matrixMul(n, a, b)
889 | ~~~
890 |
891 | 对于较大的n,我们的程序仍能输出正确的结果。
892 |
893 | ### 第二类测试集:
894 |
895 | #### 1. 求和问题测试程序:
896 |
897 | ~~~python
898 | import omp
899 |
900 | def count(n):
901 | s = 0
902 | #omp parallel private(i) num_threads(2)
903 | if omp.get_thread_num()==0:
904 | print 'num_threads =', omp.get_num_threads()
905 | #omp for reduction(+:s)
906 | for i in xrange(n):
907 | s += i
908 | #omp parallel end
909 | return s
910 | print count(500000000)
911 | ~~~
912 |
913 | 令n=500000000,在不同的num\_threads的运行时间如下:
914 |
915 | | 线程个数 | 运行时间 |
916 | | :--: | :----: |
917 | | 1 | 20.10s |
918 | | 4 | 9.17s |
919 |
920 | #### 2. 求pi问题测试程序:
921 |
922 | ~~~python
923 | import omp
924 |
925 | num_step = 300000000
926 | step = 1.0 / num_step
927 |
928 | def calc_pi_for():
929 | ans = 0
930 | # omp parallel num_threads(2) private(i,x)
931 | # omp for reduction(+:ans)
932 | for i in xrange(num_step):
933 | x = (i + 0.5) * step
934 | ans += 4.0 / (1.0 + x * x)
935 | # omp parallel end
936 | print "%.10f\n" % (ans * step),
937 |
938 | calc_pi_for()
939 | ~~~
940 |
941 | 令num\_step=300000000,运行时间随num\_threads的变化如下:
942 |
943 | | 线程个数 | 运行时间 |
944 | | :--: | :----: |
945 | | 1 | 10.57s |
946 | | 4 | 4.99s |
947 |
948 | ## 完成情况与存在的问题
949 |
950 | 我们完成了开题报告中的全部目标。但我们的项目仍存在一些问题:
951 |
952 | - 鲁棒性不是很强,对于不标准的python代码支持不好
953 | - 如缩进使用tab的代码
954 | - 原理上的问题
955 | - 没有从根本上解决GIL的问题,Jython的通用性、对三方库的支持、以及效率都不能达到CPython的水准
956 |
957 | ## 致谢
958 | - 感谢所有成员的付出与合作
959 | - 感谢老师和助教的指导
960 | - 感谢其他同学的建议与支持
961 |
--------------------------------------------------------------------------------
/adds-out.py:
--------------------------------------------------------------------------------
1 | import omp
2 | omp.set_num_of_internal_locks(1)
3 | def count(n):
4 | _dict1={}
5 | _dict1['n']=n
6 | _dict1['s'] = 0
7 | #omp parallel private(i) num_threads(2)
8 | def _block0():
9 | if omp.get_thread_num()==0:
10 | print 'num_threads =', omp.get_num_threads()
11 | #omp for reduction(+:s)
12 | OMP_REDUCTION_VAR_0_0 = omp.reduction_init('+')
13 | for i in omp.prange(_dict1['n']):
14 | OMP_REDUCTION_VAR_0_0 += i
15 | omp.set_internal_lock(0)
16 | _dict1['s'] = omp.reduction('+',_dict1['s'],OMP_REDUCTION_VAR_0_0)
17 | omp.unset_internal_lock(0)
18 | omp.barrier()
19 | #omp parallel end
20 | omp.parallel_run(_block0,2)
21 | return _dict1['s']
22 | print count(500000000)
23 | pass
24 |
25 |
--------------------------------------------------------------------------------
/adds.py:
--------------------------------------------------------------------------------
1 | import omp
2 |
3 | def count(n):
4 | s = 0
5 | #omp parallel private(i) num_threads(2)
6 | if omp.get_thread_num()==0:
7 | print 'num_threads =', omp.get_num_threads()
8 | #omp for reduction(+:s)
9 | for i in xrange(n):
10 | s += i
11 | #omp parallel end
12 | return s
13 | print count(500000000)
14 |
--------------------------------------------------------------------------------
/cpu_count.txt:
--------------------------------------------------------------------------------
1 | 8
2 |
--------------------------------------------------------------------------------
/lib/Core.cpp:
--------------------------------------------------------------------------------
1 | #include "Core.h"
2 |
3 | #include "assert.h"
4 |
5 | int findVariable(Node_t *P, string name)
6 | {
7 | while (P && P->local.find(name) == P->local.end())
8 | P = P->father;
9 | if (P) return P->nodeId;
10 | return -1;
11 | }
12 |
13 | void changeVariable(Node_t *P)
14 | {
15 | cerr << "changeVariable " << P->name << endl;
16 |
17 | int n = P->logId.size();
18 | int comDep = 0, lamDep = 0;
19 | set lamArg;
20 | stack forLine;
21 | map forVar;
22 | int codeL = -1, span = 0;
23 | int ptrBlock = 0;
24 | for (int i = 0; i < n; ++i)
25 | {
26 | string log = Log[P->logId[i]];
27 | string type = parseLog(log, 0);
28 | if (type == "arg" || type == "vararg" || type == "kwarg")
29 | {
30 | if (lamDep)
31 | lamArg.insert(parseLog(log, 1));
32 | else
33 | {
34 | P->local.insert(parseLog(log, 1));
35 | string name = parseLog(log, 1);
36 | string tmpstr = P->space + dictKeyWord + ItoS(P->nodeId) + "['" + name + "']=" + name;
37 | addCode[P->sLine].push_back(make_pair(0, tmpstr));
38 | }
39 | } else
40 | if (type == "global")
41 | {
42 | P->global.insert(parseLog(log, 1));
43 | } else
44 | if (type == "comprehension")
45 | {
46 | if (parseLog(log, 1) == "start")
47 | ++comDep;
48 | else
49 | --comDep;
50 | } else
51 | if (type == "lambda")
52 | {
53 | if (parseLog(log, 1) == "start")
54 | {
55 | ++lamDep;
56 | lamArg.clear();
57 | }
58 | else
59 | --lamDep;
60 | } else
61 | if (type == "for")
62 | {
63 | if (parseLog(log, 1) == "begin")
64 | {
65 | int line = StoI(parseLog(log, 2)) - 1;
66 | forLine.push(line);
67 | map::iterator it = line2For.find(line);
68 | if (it != line2For.end())
69 | {
70 | paraFor_t *ptr = it->second;
71 | int NofVar = ptr->varName.size();
72 | for (int j = 0; j < NofVar; ++j)
73 | forVar.insert(make_pair(ptr->varName[j], "OMP_REDUCTION_VAR_" + ItoS(ptr->lockId) + "_" + ItoS(j)));
74 | }
75 | }
76 | else
77 | {
78 | int line = forLine.top();
79 | forLine.pop();
80 | map::iterator it = line2For.find(line);
81 | if (it != line2For.end())
82 | {
83 | paraFor_t *ptr = it->second;
84 | int NofVar = ptr->varName.size();
85 | for (int j = 0; j < NofVar; ++j)
86 | {
87 | forVar.erase(ptr->varName[j]);
88 | string rpStr;
89 | if (P->local.find(ptr->varName[j]) != P->local.end())
90 | rpStr = dictKeyWord+ItoS(P->nodeId)+"['"+ptr->varName[j]+"']";
91 | else
92 | {
93 | int id = findVariable(P, ptr->varName[j]);
94 | if (id == -1) continue;
95 | rpStr = dictKeyWord+ItoS(id)+"['"+ptr->varName[j]+"']";
96 | }
97 | ptr->varName[j] = rpStr;
98 | }
99 | }
100 | }
101 | } else
102 | if (type == "variable")
103 | {
104 | int tmpCodeL = StoI(parseLog(log, 1));
105 | if (tmpCodeL != codeL)
106 | {
107 | span = 0;
108 | codeL = tmpCodeL;
109 | while (ptrBlock < NofBlock && codeL > Block[ptrBlock]->tLine)
110 | ++ptrBlock;
111 | }
112 | int col = StoI(parseLog(log, 2));
113 | string name = parseLog(log, 3);
114 | string act = parseLog(log, 4);
115 | if (forVar.find(name) != forVar.end())
116 | {
117 | string rpStr = forVar[name];
118 | Code[codeL].replace(col+span, name.length(), rpStr);
119 | span += rpStr.length() - name.length();
120 | continue;
121 | }
122 | if (P->name == "global") continue;
123 | if (comDep && act == "store") continue;
124 | if (lamDep && lamArg.find(name) != lamArg.end()) continue;
125 | if (P->global.find(name) != P->global.end()) continue;
126 | if (ptrBlock < NofBlock && Block[ptrBlock]->sLine < codeL
127 | && Block[ptrBlock]->privateVar.find(name) != Block[ptrBlock]->privateVar.end()) continue;
128 | if (act == "store" || act == "del")
129 | {
130 | if (act == "store")
131 | P->local.insert(name);
132 | else
133 | P->local.erase(name);
134 | string rpStr = dictKeyWord+ItoS(P->nodeId)+"['"+name+"']";
135 | Code[codeL].replace(col+span, name.length(), rpStr);
136 | span += rpStr.length() - name.length();
137 | } else
138 | if (act == "load")
139 | {
140 | int id = findVariable(P, name);
141 | if (id == -1) continue;
142 | string rpStr = dictKeyWord+ItoS(id)+"['"+name+"']";
143 | Code[codeL].replace(col+span, name.length(), rpStr);
144 | span += rpStr.length() - name.length();
145 | }
146 | }
147 | }
148 | n = P->child.size();
149 | for (int i = 0; i < n; ++i)
150 | changeVariable(P->child[i]);
151 | }
152 |
153 | Node_t *findBelong(Node_t *P, int line)
154 | {
155 | if (line < P->sLine || P->tLine < line)
156 | return NULL;
157 | int n = P->child.size();
158 | for (int i = 0; i < n; ++i)
159 | {
160 | Node_t *ret = findBelong(P->child[i], line);
161 | if (ret) return ret;
162 | }
163 | return P;
164 | }
165 |
166 | void AddThreading(int line, string space, string funName, int numThreads)
167 | {
168 | string tmpstr;
169 | tmpstr = space + "omp.parallel_run(" + funName + "," + ItoS(numThreads) + ")";
170 | addCode[line].push_back(make_pair(0, tmpstr));
171 | }
172 |
173 | void OpenMPforPython()
174 | {
175 | cerr << "paralleling" << endl;
176 | for (int i = 0; i < NofBlock; ++i)
177 | {
178 | string space = "";
179 | int tmp = countNofSpace(Code[Block[i]->sLine+1]);
180 | for (int j = 0; j < tmp; ++j)
181 | space.push_back(' ');
182 |
183 | /* parallel */
184 | cerr << "parallel" << endl;
185 | string tmpstr = space + "def " + blockKeyWord + ItoS(i) + "():";
186 | addCode[Block[i]->sLine].push_back(make_pair(0, tmpstr));
187 | Node_t *father = findBelong(root, Block[i]->sLine);
188 | if (father->name != "global")
189 | for (set::iterator it = father->global.begin(); it != father->global.end(); ++it)
190 | {
191 | tmpstr = space + " " + "global " + *it;
192 | addCode[Block[i]->sLine].push_back(make_pair(0, tmpstr));
193 | }
194 | else
195 | {
196 | // global for global
197 | }
198 | for (int j = Block[i]->sLine+1; j < Block[i]->tLine; ++j)
199 | Code[j] = " " + Code[j];
200 | AddThreading(Block[i]->tLine, space, blockKeyWord+ItoS(i), Block[i]->numThreads);
201 |
202 | /* sections */
203 | int NofSections = Block[i]->sections.size();
204 | for (int id1 = 0; id1 < NofSections; ++id1)
205 | {
206 | int sLine = Block[i]->sections[id1].sLine;
207 | int NofSection = Block[i]->sections[id1].section.size();
208 | tmpstr = space + " for OMP_SECTIONS_ID in omp.prange(" + ItoS(NofSection) + "):";
209 | addCode[sLine].push_back(make_pair(0, tmpstr));
210 | for (int id2 = 0; id2 < NofSection; ++id2)
211 | {
212 | int sLine = Block[i]->sections[id1].section[id2].sLine;
213 | int tLine = Block[i]->sections[id1].section[id2].tLine;
214 | string tmpstr = space + " if OMP_SECTIONS_ID == " + ItoS(id2) + ":";
215 | addCode[sLine].push_back(make_pair(0, tmpstr));
216 | for (int j = sLine+1; j < tLine; ++j)
217 | Code[j] = " " + Code[j];
218 | }
219 | int tLine = Block[i]->sections[id1].tLine;
220 | if (Block[i]->sections[id1].nowait == false)
221 | {
222 | tmpstr = space + " omp.barrier()";
223 | addCode[tLine].push_back(make_pair(1, tmpstr));
224 | }
225 | }
226 |
227 | /* for */
228 | int NofFor = Block[i]->paraFor.size();
229 | for (int id = 0; id < NofFor; ++id)
230 | {
231 | int line = Block[i]->paraFor[id].sLine;
232 | int NofVar = Block[i]->paraFor[id].varName.size();
233 | if (NofVar != 0)
234 | {
235 | for (int j = 0; j < NofVar; ++j)
236 | {
237 | tmpstr = space + " OMP_REDUCTION_VAR_" + ItoS(Block[i]->paraFor[id].lockId) +
238 | "_" + ItoS(j) + " = omp.reduction_init('" + Block[i]->paraFor[id].operCh[j] + "')";
239 | addCode[line-1].push_back(make_pair(0, tmpstr));
240 | }
241 | }
242 | int spos = Code[line].find(" in ") + 4;
243 | int tpos = Code[line].find(":") - 1;
244 | string subline = strip(Code[line].substr(spos, tpos-spos+1));
245 | string str = (Block[i]->paraFor[id].mode == "dynamic")? "omp.drange" : "omp.prange";
246 | if (begin_with(subline, "xrange("))
247 | Code[line].replace(Code[line].find("xrange"), 6, str);
248 | else if (begin_with(subline, "range("))
249 | Code[line].replace(Code[line].find("range"), 5, str);
250 | else
251 | {
252 | str = (Block[i]->paraFor[id].mode == "dynamic")? "omp.dlist(" : "omp.plist(";
253 | Code[line].insert(tpos+1, ")");
254 | Code[line].insert(spos, str);
255 | }
256 | int tLine = Block[i]->paraFor[id].tLine;
257 | if (NofVar != 0)
258 | {
259 | for (int j = 0; j < NofVar; ++j)
260 | {
261 | tmpstr = space + " omp.set_internal_lock(" + ItoS(Block[i]->paraFor[id].lockId) + ")";
262 | addCode[tLine].push_back(make_pair(0, tmpstr));
263 | tmpstr = space + " " + Block[i]->paraFor[id].varName[j] +
264 | " = omp.reduction('" + Block[i]->paraFor[id].operCh[j] + "'," +
265 | Block[i]->paraFor[id].varName[j] + "," + "OMP_REDUCTION_VAR_" +
266 | ItoS(Block[i]->paraFor[id].lockId) + "_" + ItoS(j) + ")";
267 | addCode[tLine].push_back(make_pair(1, tmpstr));
268 | tmpstr = space + " omp.unset_internal_lock(" + ItoS(Block[i]->paraFor[id].lockId) + ")";
269 | addCode[tLine].push_back(make_pair(2, tmpstr));
270 | }
271 | }
272 | if (Block[i]->paraFor[id].nowait == false)
273 | {
274 | tmpstr = space + " omp.barrier()";
275 | addCode[tLine].push_back(make_pair(3, tmpstr));
276 | }
277 | }
278 |
279 | /* barrier */
280 | int NofBarrier = Block[i]->barrier.size();
281 | for (int id = 0; id < NofBarrier; ++id)
282 | {
283 | int line = Block[i]->barrier[id];
284 | tmpstr = space + " omp.barrier()";
285 | addCode[line].push_back(make_pair(0, tmpstr));
286 | }
287 |
288 | /* critical */
289 | int NofCritical = Block[i]->critical.size();
290 | for (int id = 0; id < NofCritical; ++id)
291 | {
292 | int sLine = Block[i]->critical[id].sLine;
293 | int tLine = Block[i]->critical[id].tLine;
294 | tmpstr = space + " omp.set_internal_lock(" + ItoS(Block[i]->critical[id].lockId) + ")";
295 | addCode[sLine].push_back(make_pair(0, tmpstr));
296 | tmpstr = space + " omp.unset_internal_lock(" + ItoS(Block[i]->critical[id].lockId) + ")";
297 | addCode[tLine-1].push_back(make_pair(0, tmpstr));
298 | }
299 | }
300 | }
--------------------------------------------------------------------------------
/lib/Core.h:
--------------------------------------------------------------------------------
1 | #ifndef CORE_H
2 | #define CORE_H
3 |
4 | #include
5 | #include