├── latex ├── after_body.tex ├── before_body.tex ├── preamble.tex └── template.tex ├── materials ├── chapter10 │ ├── atomic.c │ ├── barrier.c │ ├── master.c │ ├── critical.c │ ├── threadprivate.c │ ├── dataprallel.c │ ├── tra_SIMD.c │ ├── taskparllel.c │ ├── single.c │ ├── loongsonSIMD.S │ ├── for.c │ ├── OpenMP_struct.c │ ├── paralle.c │ ├── test4.c │ ├── SIMD.S │ ├── sections.c │ ├── MPI.c │ ├── parallel_for.c │ ├── pi_C.c │ ├── parallel_sections.c │ ├── pi_OpenMP.c │ ├── maritx_OpenMP.c │ ├── pi_MPI.c │ ├── Makefile │ ├── Pthreads.c │ ├── pi_Pthreads.c │ └── martix_MPI.c ├── chapter2 │ ├── alpha_note.txt │ ├── ppc_note.txt │ ├── if_else.c │ ├── if_else.S │ ├── ppc.S │ ├── vax_addressing.csv │ ├── add_and_ref.c │ ├── isatype.csv │ ├── mem_inst.csv │ ├── addr_compare.csv │ ├── alpha.S │ ├── control_inst.csv │ ├── int_type.csv │ ├── regnum.csv │ ├── switch_case.c │ ├── add_and_ref.S │ ├── loop.c │ ├── switch_case_chain.S │ ├── loop.S │ ├── addressing.csv │ ├── switch_case.S │ └── alu_inst.csv ├── chapter8 │ ├── vertical_calculation.csv │ ├── boolean.csv │ ├── booth_one_rule.csv │ ├── fulladder_truetable.csv │ ├── 8-1selector_true_table.csv │ ├── booth_two_rule.csv │ ├── 3-8decoder_true_table.csv │ └── IEEE754float.csv ├── chapter6 │ ├── pcie_signals.csv │ ├── ht_response.csv │ ├── ht_packet_format.csv │ ├── ht_signals.csv │ ├── ht_request.csv │ ├── ddr3_udimm.csv │ └── axi.csv ├── chapter4 │ ├── fun.c.png │ ├── t.S │ ├── fun_la.S.png │ ├── normal.S.png │ ├── normal.c.png │ ├── simple.S.png │ ├── simple.c.png │ ├── varg.c.png │ ├── dynamic.S.png │ ├── dynamic.c.png │ ├── fun_mips.S.png │ ├── simple_nofp.S.png │ ├── syscall_write.S.png │ ├── keyboard_interrupt.txt.png │ ├── simple.c │ ├── t.c │ ├── normal.c │ ├── simple_nofp.S │ ├── varg.c │ ├── context_switch.csv │ ├── fun.c │ ├── varg_passing.csv │ ├── dynamic.c │ ├── la_reg.csv │ ├── syscall.csv │ ├── simple.S │ ├── fun_la.S │ ├── mips_reg.csv │ ├── syscall_write.S │ ├── fun_mips.S │ ├── normal.S │ ├── Makefile │ ├── dynamic.S │ └── keyboard_interrupt.txt ├── chapter7 │ ├── space_requirement.csv │ ├── space_allocation.csv │ ├── reg_multiplex.csv │ └── serial_status.csv ├── chapter12 │ ├── stream-bandwidth.csv │ ├── memory-latency.csv │ ├── STREAM.csv │ ├── perf-eval-class.csv │ ├── UnixBench.csv │ ├── performance-formula.csv │ ├── perf-tools.csv │ ├── PARSEC.csv │ ├── alu-delay.csv │ ├── cpu-params.csv │ ├── perf-event-3A5000.csv │ ├── perf-event-nehalem.csv │ ├── 2006brpercent.csv │ ├── LMbench.csv │ ├── AMDTest.csv │ ├── SPEC2017.csv │ ├── 2006brpred.csv │ ├── 2006rate4.csv │ ├── 2006ipc.csv │ ├── 2006speed.csv │ ├── 2006speed-nopara.csv │ ├── 2006brbandwidth.csv │ ├── 2006insts.csv │ ├── SPEC2006.csv │ └── SPEC2000.csv ├── chapter5 │ ├── pio_vs_dma.csv │ └── alu.csv ├── chapter11 │ └── cache_parameter.csv ├── chapter1 │ ├── program_and_data.csv │ ├── flops_bandwidth.csv │ └── spec_cpu2000.csv └── chapter3 │ ├── exception.csv │ └── csr.csv ├── images ├── by-nc.png ├── chapter1 │ ├── 1-1.eps │ ├── power.png │ ├── hierarchy.png │ ├── ic_develop.png │ ├── china_design.png │ └── device_to_chip.png ├── chapter2 │ ├── csr.png │ ├── lwl.png │ ├── page.png │ ├── seg-page.png │ ├── segment.png │ ├── hierarchy.png │ ├── inst_coding.png │ ├── isa-compare.png │ └── loongarch-coding.png ├── chapter3 │ ├── csr.png │ ├── crmd.png │ ├── tlb_reg.png │ ├── page_table.png │ ├── tlb_convert.png │ ├── tlb_entry.png │ └── memcpy_program.png ├── chapter5 │ ├── soc.png │ ├── bht_loop.png │ ├── dram_cell.png │ ├── 3A3000_display.png │ ├── 3A3000_display1.png │ ├── 3A3000_display2.png │ ├── 3A3000_display3.png │ ├── 3A3000_display4.png │ ├── disk_structure.png │ ├── loongson_3A3000.png │ ├── sdram_structure.png │ ├── structure_2part.png │ ├── structure_3part.png │ ├── structure_4part.png │ ├── von_architecture.png │ ├── storage_hierarchy.png │ └── structure_3part_weaknb.png ├── chapter6 │ ├── axi.png │ ├── ddr2.png │ ├── ddr3.png │ ├── ahb_apb.png │ ├── jz_m200.png │ ├── burst_read.png │ ├── ddr2_state.png │ ├── ddr3_read.png │ ├── ddr3_write.png │ ├── ht_transfer.png │ ├── pci_signals.png │ ├── pcie_packet.png │ ├── command_after.png │ ├── ht_two_chips.png │ ├── loongson_4way.png │ ├── pcie_location.png │ ├── sdram_timing.png │ ├── axi_interconnect.png │ ├── command_before.png │ ├── ht_interconnect.png │ ├── overlapped_read.png │ ├── pci_interconnect.png │ ├── read_structure.png │ ├── write_structure.png │ ├── pcie_interconnect.png │ └── write_transaction.png ├── chapter9 │ ├── BTB.png │ ├── PHT.png │ ├── raw.png │ ├── cache.emf │ ├── cache.png │ ├── decode.png │ ├── LS3A2000.emf │ ├── LS3A3000.png │ ├── cacheMap.png │ ├── dualIssue.ai │ ├── dynamic.emf │ ├── dynamic.png │ ├── dynamic1.png │ ├── ctrlHazard.png │ ├── dualIssue.png │ ├── forwarding.png │ ├── multicycle.png │ ├── stallflow.png │ ├── brachRelation.ai │ ├── brachRelation.png │ ├── componentflow.emf │ ├── componentflow.png │ ├── pipelineflow.png │ ├── cacheMapStruct.png │ ├── ctrlHazardFlow.png │ ├── ctrlHazardFlow1.png │ ├── ctrlHazardStruct.png │ ├── datapathWithClk.png │ ├── multicycleflow.emf │ ├── multicycleflow.png │ ├── pipelinestruct.emf │ ├── pipelinestruct.png │ ├── instHazardPipeline.png │ ├── simpleCPUdatapath.emf │ ├── simpleCPUdatapath.png │ └── componentflowWithStall.png ├── chapter10 │ ├── 线程同步.ai │ ├── 线程同步.png │ ├── 线程管理.ai │ ├── 编译制导语言.ai │ ├── 矩阵乘法算法示意.ai │ ├── 线程管理-01.png │ ├── MPI的6个基本的函数.ai │ ├── SISD_SIMD.png │ ├── 积分求圆周率算法示意.ai │ ├── 编译制导语言-01.png │ ├── shared_task.png │ ├── 矩阵乘法算法示意-01.png │ ├── MPI的6个基本的函数-01.png │ ├── 积分求圆周率算法示意-01.png │ └── Shared_storage_and_message_passing_programming.png ├── chapter11 │ ├── 11-16.png │ ├── 11-17.png │ ├── 11-18.png │ ├── 11-24.png │ ├── 11-3.png │ ├── topo.png │ ├── cell_arch.png │ ├── cuda_core.png │ ├── dir_invld.png │ ├── shared_llc.png │ ├── sm_single.png │ ├── sm_whole.png │ ├── esi_transit.png │ ├── interconnect.png │ ├── la464_uarch.png │ ├── ll_sc_atomic.png │ ├── noc_example.png │ ├── test_and_set.png │ ├── tile64_arch.png │ ├── cache_structure.png │ ├── ls3a5000_arch.png │ ├── ls3a5000_layout.png │ ├── router_struct.png │ ├── different_results.png │ ├── nuca_interconnect.png │ ├── sandybridge_arch.png │ ├── centralized_barrier.png │ ├── fermi_mem_hierarchy.png │ └── flow_control_method.png ├── chapter8 │ ├── 8-19A.png │ ├── 8-21a.png │ ├── 8-22a.png │ ├── hwCMOS.png │ ├── 4bit_CLA.png │ ├── D_latch.png │ ├── RS_latch.png │ ├── 16bit_CLA.png │ ├── 32bit_CLA.png │ ├── 32bit_RCA.png │ ├── MOS_switch.png │ ├── logic_gate.png │ ├── subtracter.png │ ├── 4bit_comparer.png │ ├── 4bit_shifter.png │ ├── CMOS_NOR_gate.png │ ├── CMOS_NOT_gate.png │ ├── Dlatch_timing.png │ ├── IEEE754_float.png │ ├── MOS_structure.png │ ├── clock_signal.png │ ├── hw logic gate.png │ ├── 1bit_full_adder.png │ ├── CMOS_D_Flip-Flop.png │ ├── CMOS_NAND_gate.png │ ├── CMOS_TRANS_gate.png │ ├── Booth_select_logic.png │ ├── Booth_partial_product.png │ ├── CMOS_inverter_delay.png │ ├── NMOS_workingprinciple.png │ ├── 16bit_multiplying_unit.png │ ├── 1bit_full_adder_circuit.png │ ├── 1bit_wallace_tree_for8.png │ ├── Booth_selectsignal_logic.png │ ├── Booth_two_multiplication.png │ ├── booth_one_multiplication.png │ ├── complement_multiplication.png │ ├── sicicium_atomic_structure.png │ ├── 4bit_CLA(include carry factor).png │ ├── FULL_adder for four 4bit number.png │ ├── WORONG_exaxple of walllace tree.png │ ├── Booth_multiplication with shifter.png │ ├── FULL_adder for three 4bit number.png │ ├── iterative_complement_multipilier.png │ └── iterative_sourcecode_multipilier.png ├── chapter7 │ ├── bar_reg.png │ ├── boot_flow.png │ ├── config_reg.png │ ├── l1_dcache.png │ └── bus_access_type.png ├── chapter12 │ ├── Perf的工作原理图.png │ ├── 访存操作的并发性.png │ ├── 功能部件操作延迟-01.png │ ├── 微基准测试程序集-01.png │ ├── Nehalem平台中显示的perf list输出-01.png │ ├── 3A5000和对比处理器的各级Cache和内存访问延迟数据-01.png │ ├── 基于跳步访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png │ └── 基于随机访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png ├── chapter4 │ ├── as_example.png │ ├── stack_frame.png │ └── address_space.png └── foreword │ └── liguojie_sign.png ├── word └── template.docx ├── 40-references.Rmd ├── .gitignore ├── _bookdown.yml ├── multi_column.template ├── Makefile ├── 50-resources.Rmd ├── css └── style.css ├── .github └── workflows │ └── deploy_bookdown.yml ├── 03-foreword-3rd.Rmd ├── _output.yml ├── 05-online-version.Rmd ├── docker └── Dockerfile ├── 02-foreword-author.Rmd ├── 04-preface.Rmd ├── 01-foreword-recommend.Rmd ├── README.md ├── index.Rmd ├── chinese-gb7714-2005-numeric.csl ├── 30-conclusion.Rmd ├── renv.lock └── LICENSE /latex/after_body.tex: -------------------------------------------------------------------------------- 1 | \backmatter 2 | \printindex 3 | -------------------------------------------------------------------------------- /materials/chapter10/atomic.c: -------------------------------------------------------------------------------- 1 | #pragma omp atomic newline -------------------------------------------------------------------------------- /materials/chapter10/barrier.c: -------------------------------------------------------------------------------- 1 | #pragma omp barrier newline -------------------------------------------------------------------------------- /materials/chapter10/master.c: -------------------------------------------------------------------------------- 1 | #pragma omp master newline -------------------------------------------------------------------------------- /materials/chapter10/critical.c: -------------------------------------------------------------------------------- 1 | #pragma omp critical[name] newline -------------------------------------------------------------------------------- /materials/chapter10/threadprivate.c: -------------------------------------------------------------------------------- 1 | #pragma omp threadprivate(list) -------------------------------------------------------------------------------- /images/by-nc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/by-nc.png -------------------------------------------------------------------------------- /word/template.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/word/template.docx -------------------------------------------------------------------------------- /materials/chapter10/dataprallel.c: -------------------------------------------------------------------------------- 1 | for(i=0,i 5 | 6 | \newpage 7 | -------------------------------------------------------------------------------- /images/chapter11/centralized_barrier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter11/centralized_barrier.png -------------------------------------------------------------------------------- /images/chapter11/fermi_mem_hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter11/fermi_mem_hierarchy.png -------------------------------------------------------------------------------- /images/chapter11/flow_control_method.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter11/flow_control_method.png -------------------------------------------------------------------------------- /images/chapter8/Booth_partial_product.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/Booth_partial_product.png -------------------------------------------------------------------------------- /images/chapter8/CMOS_inverter_delay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/CMOS_inverter_delay.png -------------------------------------------------------------------------------- /images/chapter8/NMOS_workingprinciple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/NMOS_workingprinciple.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | _book 6 | _bookdown_files 7 | project.vim 8 | materials/*/*.png 9 | -------------------------------------------------------------------------------- /images/chapter5/structure_3part_weaknb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter5/structure_3part_weaknb.png -------------------------------------------------------------------------------- /images/chapter8/16bit_multiplying_unit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/16bit_multiplying_unit.png -------------------------------------------------------------------------------- /images/chapter8/1bit_full_adder_circuit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/1bit_full_adder_circuit.png -------------------------------------------------------------------------------- /images/chapter8/1bit_wallace_tree_for8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/1bit_wallace_tree_for8.png -------------------------------------------------------------------------------- /images/chapter9/componentflowWithStall.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter9/componentflowWithStall.png -------------------------------------------------------------------------------- /materials/chapter10/single.c: -------------------------------------------------------------------------------- 1 | #pragma omp single [private(list)|firstprivate(list)| copyprivate(list)|nowait] newline 2 | Structured_block -------------------------------------------------------------------------------- /images/chapter8/Booth_selectsignal_logic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/Booth_selectsignal_logic.png -------------------------------------------------------------------------------- /images/chapter8/Booth_two_multiplication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/Booth_two_multiplication.png -------------------------------------------------------------------------------- /images/chapter8/booth_one_multiplication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/booth_one_multiplication.png -------------------------------------------------------------------------------- /images/chapter8/complement_multiplication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/complement_multiplication.png -------------------------------------------------------------------------------- /images/chapter8/sicicium_atomic_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/sicicium_atomic_structure.png -------------------------------------------------------------------------------- /latex/before_body.tex: -------------------------------------------------------------------------------- 1 | 2 | \thispagestyle{empty} 3 | 4 | \setlength{\abovedisplayskip}{-5pt} 5 | \setlength{\abovedisplayshortskip}{-5pt} 6 | -------------------------------------------------------------------------------- /materials/chapter4/keyboard_interrupt.txt.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/materials/chapter4/keyboard_interrupt.txt.png -------------------------------------------------------------------------------- /materials/chapter8/booth_one_rule.csv: -------------------------------------------------------------------------------- 1 | y~i~,y~i-1~,操作 2 | 0,0,不需要加(+0) 3 | 0,1,补码加X(+[X]~补~) 4 | 1,0,补码减X(-[X]~补~) 5 | 1,1,不需要加(+0) 6 | -------------------------------------------------------------------------------- /images/chapter12/Nehalem平台中显示的perf list输出-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter12/Nehalem平台中显示的perf list输出-01.png -------------------------------------------------------------------------------- /materials/chapter4/simple.c: -------------------------------------------------------------------------------- 1 | int simple(int a, int b) { 2 | return ((a&0xff)+b); 3 | } 4 | -------------------------------------------------------------------------------- /images/chapter8/4bit_CLA(include carry factor).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/4bit_CLA(include carry factor).png -------------------------------------------------------------------------------- /images/chapter8/FULL_adder for four 4bit number.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/FULL_adder for four 4bit number.png -------------------------------------------------------------------------------- /images/chapter8/WORONG_exaxple of walllace tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/WORONG_exaxple of walllace tree.png -------------------------------------------------------------------------------- /materials/chapter2/if_else.S: -------------------------------------------------------------------------------- 1 | move $t0, cond_exp 2 | beqz $t0, .L1 3 | 4 | b .L2 5 | .L1: 6 | 7 | .L2: 8 | 9 | -------------------------------------------------------------------------------- /images/chapter12/3A5000和对比处理器的各级Cache和内存访问延迟数据-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter12/3A5000和对比处理器的各级Cache和内存访问延迟数据-01.png -------------------------------------------------------------------------------- /images/chapter8/Booth_multiplication with shifter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/Booth_multiplication with shifter.png -------------------------------------------------------------------------------- /images/chapter8/FULL_adder for three 4bit number.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/FULL_adder for three 4bit number.png -------------------------------------------------------------------------------- /images/chapter8/iterative_complement_multipilier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/iterative_complement_multipilier.png -------------------------------------------------------------------------------- /images/chapter8/iterative_sourcecode_multipilier.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter8/iterative_sourcecode_multipilier.png -------------------------------------------------------------------------------- /materials/chapter7/space_requirement.csv: -------------------------------------------------------------------------------- 1 | 设备号,名称,BAR号,大小 2 | 1,USB控制器,0,4KB 3 | 2,显示控制器,0,128MB 4 | 2,显示控制器,1,64KB 5 | 3,网络控制器,0,4KB 6 | 3,网络控制器,1,16KB 7 | -------------------------------------------------------------------------------- /materials/chapter4/t.c: -------------------------------------------------------------------------------- 1 | 2 | IIIIIIIIII 3 | HHHHHHHHHH 4 | AAAAATTTTT 5 | ATATATATAT 6 | -------------------------------------------------------------------------------- /materials/chapter10/loongsonSIMD.S: -------------------------------------------------------------------------------- 1 | gsldxc1 $f0, 0x0($src0, $0) 2 | gsldxc1 $f2, 0x0($src1, $0) 3 | paddb $f0, $f0, $f2 4 | gssdxc1 $f0, 0x0($result, $0) -------------------------------------------------------------------------------- /materials/chapter2/ppc.S: -------------------------------------------------------------------------------- 1 | LOOP: 2 | LFU fp0=y(r4=r4+8) 3 | FMUL fp0=fp0,fp1 4 | LF fp2=x(r3,8) 5 | FMADD fp0=fp0,fp2,fp3 6 | STFU x(r3=r3+8)=fp0 7 | BC LOOP,CTR>0 8 | -------------------------------------------------------------------------------- /images/chapter12/基于跳步访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter12/基于跳步访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png -------------------------------------------------------------------------------- /images/chapter12/基于随机访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter12/基于随机访问的3A5000和Zen1、Skylake各级延迟的比较(cycles).png -------------------------------------------------------------------------------- /materials/chapter2/vax_addressing.csv: -------------------------------------------------------------------------------- 1 | 寻址方式,tex,spice,gcc 2 | 偏移量寻址,32%,55%,40% 3 | 立即数寻址,43%,17%,39% 4 | 寄存器间接寻址,24%,3%,11% 5 | 自增量寻址,0%,16%,6% 6 | 存储器间接寻址,1%,6%,1% 7 | -------------------------------------------------------------------------------- /images/chapter10/Shared_storage_and_message_passing_programming.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/foxsen/archbase/main/images/chapter10/Shared_storage_and_message_passing_programming.png -------------------------------------------------------------------------------- /materials/chapter4/normal.c: -------------------------------------------------------------------------------- 1 | extern int nested(int a, int b, int c, int d, int e, int f, int g, int h, int i); 2 | int normal(void){ 3 | return nested(1, 2, 3, 4, 5, 6, 7, 8, 9); 4 | } 5 | -------------------------------------------------------------------------------- /materials/chapter4/simple_nofp.S: -------------------------------------------------------------------------------- 1 | simple: 2 | bstrpick.w $a0,$a0,7,0 3 | add.w $a0,$a0,$a1 4 | jr $ra 5 | -------------------------------------------------------------------------------- /materials/chapter8/fulladder_truetable.csv: -------------------------------------------------------------------------------- 1 | A,B,C~in~,S,C~out~ 2 | 0,0,0,0,0 3 | 0,0,1,1,0 4 | 0,1,0,1,0 5 | 0,1,1,0,1 6 | 1,0,0,1,0 7 | 1,0,1,0,1 8 | 1,1,0,0,1 9 | 1,1,1,1,1 10 | -------------------------------------------------------------------------------- /materials/chapter10/for.c: -------------------------------------------------------------------------------- 1 | #pragma omp for [private(list)| firstprivate(list)|lastprivate(list)| \ 2 | reduction(reduction-identifier:list)|schedule(kind[,chunk_size])|collapse(n)|ordered| nowait] newline 3 | -------------------------------------------------------------------------------- /materials/chapter2/add_and_ref.c: -------------------------------------------------------------------------------- 1 | int add(int a,int b) 2 | { 3 | return a+b; 4 | } 5 | 6 | int ref(void) 7 | { 8 | 9 | int t1 = 12; 10 | int t2 = 34; 11 | 12 | return add(t1,t2); 13 | } 14 | -------------------------------------------------------------------------------- /materials/chapter6/ht_response.csv: -------------------------------------------------------------------------------- 1 | 字节\数据位,7,6,5,4,3,2,1,0 2 | 0,命令相关内容,命令相关内容,,命令,命令,命令,命令,命令 3 | 1,,,,设备标识,设备标识,设备标识,设备标识,设备标识 4 | 2,,,错误,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容 5 | 3,,,错误,,,,命令相关内容,命令相关内容 6 | -------------------------------------------------------------------------------- /materials/chapter12/stream-bandwidth.csv: -------------------------------------------------------------------------------- 1 | STREAM四核(openMP),3A5000,Zen1,Skylake 2 | Copy,23860.9,39896.4,26983.3 3 | Scale,22347.3,25073.3,19110.9 4 | Add,19323,29768.5,21516 5 | Triad,21043.8,29146.8,21490.7 6 | -------------------------------------------------------------------------------- /materials/chapter2/isatype.csv: -------------------------------------------------------------------------------- 1 | 堆栈型,累加器型,寄存器-存储器型,寄存器-寄存器型 2 | PUSH A,LOAD A,"LOAD R1,A","LOAD R1,A" 3 | PUSH B,ADD B,"ADD R1,B","LOAD R2,B" 4 | ADD,STORE C,"STORE C,R1","ADD R3,R1,R2" 5 | POP C,,,"STORE C,R3" 6 | -------------------------------------------------------------------------------- /materials/chapter8/8-1selector_true_table.csv: -------------------------------------------------------------------------------- 1 | 输入1,输入2,输入3,输出 2 | C,B,A,Y 3 | 0,0,0,D~0~ 4 | 0,0,1,D~1~ 5 | 0,1,0,D~2~ 6 | 0,1,1,D~3~ 7 | 1,0,0,D~4~ 8 | 1,0,1,D~5~ 9 | 1,1,0,D~6~ 10 | 1,1,1,D~7~ 11 | -------------------------------------------------------------------------------- /materials/chapter10/OpenMP_struct.c: -------------------------------------------------------------------------------- 1 | #include 2 | main(){ 3 | int var1,var2,var3; 4 | … 5 | #pragma omp parallel private(var1,var2) shared(var3) 6 | { 7 | … 8 | } 9 | … 10 | } 11 | -------------------------------------------------------------------------------- /materials/chapter2/mem_inst.csv: -------------------------------------------------------------------------------- 1 | 指令,指令功能 2 | LD.B,取字节 3 | LD.BU,取字节,无符号扩展 4 | LD.H,取半字 5 | LD.HU,取半字,无符号扩展 6 | LD.W,取字 7 | LD.WU,取字,无符号扩展 8 | LD.D,取双字 9 | ST.B,存字节 10 | ST.H,存半字 11 | ST.W,存字 12 | ST.D,存双字 13 | -------------------------------------------------------------------------------- /materials/chapter2/addr_compare.csv: -------------------------------------------------------------------------------- 1 | 寻址方式,MIPS,PowerPC,PA-RISC,SPARC,LoongArch 2 | 寄存器寻址,Y,Y,Y,Y,Y 3 | 立即数寻址,Y,Y,Y,Y,Y 4 | 偏移量寻址,Y,Y,Y,Y,Y 5 | 变址寻址,Y(仅浮点),Y,Y,Y,Y 6 | 比例变址寻址,,,Y,, 7 | 自增/自减+偏移量寻址,,Y,Y,, 8 | 自增/自减+变址寻址,,Y,Y,, 9 | -------------------------------------------------------------------------------- /materials/chapter2/alpha.S: -------------------------------------------------------------------------------- 1 | LOOP: 2 | LDT fp3=y(r2,0) 3 | LDT fp1=x(r1,0) 4 | MULT fp3=fp3,fp2 5 | ADDQ r2=r2,8 6 | MULT fp1=fp1,fp4 7 | SUBQ r4=r2,r6 8 | ADDT fp1=fp3,fp1 9 | STT x(r1,0)=fp1 10 | ADDQ r1=r1,8 11 | BNE r4,LOOP 12 | -------------------------------------------------------------------------------- /materials/chapter10/paralle.c: -------------------------------------------------------------------------------- 1 | #pragma omp parallel [if(scalar_expression)| num_threads(integer-expression)|default(shared|none)| private(list)| \ 2 | firstprivate(list)|shared(list)| copyin(list) |reduction(operator:list)| proc_bind(master|close|spread)] newline -------------------------------------------------------------------------------- /materials/chapter10/test4.c: -------------------------------------------------------------------------------- 1 | If(mypid==0) { 2 | MPI_Bcast(buf0,count,type,0,comm,ierr); 3 | MPI_Send(buf1,count,type,1,tag,comm,ierr); 4 | } else { 5 | MPI_Recv(buf1,count,type,0,tag,comm,ierr); 6 | MPI_Bcast(buf0,count,type,0,comm,ierr); 7 | } 8 | -------------------------------------------------------------------------------- /materials/chapter12/memory-latency.csv: -------------------------------------------------------------------------------- 1 | CPU型号,"3A5000 2.5G","Zen1 r3 1200","Zen+ r3 3100","Skylake i3 9100f" 2 | 一级Cache延迟,4拍,4拍,4拍,4拍 3 | 二级Cache延迟,14 拍,17拍,12拍,12拍 4 | 三级Cache延迟,38~45 拍,38~49拍,38~45拍,38~48拍 5 | 内存访问延迟,40拍+80ns,40拍+85ns,40拍+75ns,40拍+68ns 6 | -------------------------------------------------------------------------------- /_bookdown.yml: -------------------------------------------------------------------------------- 1 | book_filename: bookdown 2 | clean: [bookdown.bbl] 3 | delete_merged_file: true 4 | language: 5 | label: 6 | fig: "图 " 7 | tab: "表 " 8 | ui: 9 | edit: "编辑" 10 | chapter_name: ["第 ", " 章"] 11 | part_name: ["第 ", " 部分"] 12 | -------------------------------------------------------------------------------- /materials/chapter4/varg.c: -------------------------------------------------------------------------------- 1 | struct Ss { 2 | char c1, c2; 3 | } a3 = {3, 4}; 4 | int fun (double a1, ...); 5 | int test () { 6 | return fun (1, (float) 2, a3, (long double) 5, (float) 6, 7 | (short) 7, (int) 8, (float) 9, (int)10); 8 | } 9 | -------------------------------------------------------------------------------- /materials/chapter4/context_switch.csv: -------------------------------------------------------------------------------- 1 | 场景,上下文切换时保存和恢复的内容 2 | 函数调用,部分寄存器(包括栈帧相关的$sp,$fp)、返回地址 3 | 中断和异常,(通常情况)全部定点寄存器、异常现场信息、异常相关信息 4 | 系统调用,部分定点寄存器(包括栈帧相关寄存器)、异常现场信息 5 | 线程,全部用户态寄存器、TLS、当前PC等相关信息 6 | 进程,全部用户态寄存器、页表基址等控制寄存器、当前PC等相关信息 7 | 虚拟机,虚拟CPU状态(寄存器、必要的特权资源等) 8 | -------------------------------------------------------------------------------- /materials/chapter5/pio_vs_dma.csv: -------------------------------------------------------------------------------- 1 | PIO方式,DMA方式 2 | 键盘输入,网卡收包 3 | 敲击键盘,接收端收到网络包 4 | 键盘输入被记录在PS/2控制器内,网卡将收到的网络包写入内存中预先分配好的内存中 5 | PS/2控制器向处理器发送中断,网卡向处理器发送中断 6 | CPU查询中断源,发现键盘中断,CPU查询中断源,发现网卡接收中断 7 | CPU从PS/2控制器内读回键盘值,CPU从内存中读到网络包,并进行处理,初始化新的接收缓冲供网卡使用 8 | CPU清中断,CPU清中断 9 | -------------------------------------------------------------------------------- /materials/chapter8/booth_two_rule.csv: -------------------------------------------------------------------------------- 1 | y~i+1~,y~i~,y~i-1~,操作 2 | 0,0,0,不需要加(+0) 3 | 0,0,1,补码加X(+[X]~补~) 4 | 0,1,0,补码加X(+[X]~补~) 5 | 0,1,1,补码加2X(+[X]~补~左移) 6 | 1,0,0,补码减2X(-[X]~补~左移) 7 | 1,0,1,补码减X(-[X]~补~) 8 | 1,1,0,补码减X(-[X]~补~) 9 | 1,1,1,不需要加(+0) 10 | -------------------------------------------------------------------------------- /materials/chapter10/SIMD.S: -------------------------------------------------------------------------------- 1 | li $4, 0x0 2 | li $5, 0x8 3 | daddu $src0, $4 4 | daddu $src1, $4 5 | daddu $result, $4 6 | lb $6, 0x0($src0) 7 | lb $7, 0x0($src1) 8 | daddu $6, $6, $7 9 | sb $6, 0x0($result) 10 | daddiu $4, 0x1 11 | blt $4, $5, 1b 12 | nop -------------------------------------------------------------------------------- /materials/chapter11/cache_parameter.csv: -------------------------------------------------------------------------------- 1 | ⠀,IBM Power8,Intel Haswell,Oracle SPARC T5,龙芯3A5000 2 | 每芯片核数,12,4,16,4 3 | 每核线程数,8,2,8,1 4 | 每核一级指令Cache,32KB,32KB,16KB,64KB 5 | 每核一级数据Cache,64KB,32KB,16KB,64KB 6 | 每核二级Cache,512KB,256KB,128 KB,256KB 7 | 片上共享LLC,96MB,8MB,8MB,16MB 8 | -------------------------------------------------------------------------------- /materials/chapter12/STREAM.csv: -------------------------------------------------------------------------------- 1 | opname,operation,description 2 | Copy,a(i)=b(i),2个双精度浮点的访存操作(16字节),每个迭代没有浮点操作 3 | Scale,a(i)=q*b(i),2个双精度浮点的访存操作(16字节),每个迭代包含一个浮点乘法操作 4 | Add,a(i)=b(i)+c(i),3个双精度浮点的访存操作(24字节),每次迭代包含一个浮点加法操作 5 | Triad,a(i)=b(i)+q*c(i),3个双精度浮点的访存操作(24字节),每次迭代两个浮点操作 6 | -------------------------------------------------------------------------------- /materials/chapter4/fun.c: -------------------------------------------------------------------------------- 1 | extern void abort(void); 2 | int fun(double a1, double a2, double a3, double a4, double a5, double a6, 3 | double a7, double a8, double a9, int a10, double a11, int a12) 4 | { 5 | if (a9 != a11) abort(); 6 | return 0; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /materials/chapter7/space_allocation.csv: -------------------------------------------------------------------------------- 1 | 设备号,名称,BAR号,大小,起始地址,结束地址 2 | 1,USB控制器,0,4KB,0x48015000,0x48015FFF 3 | 2,显示控制器,0,128MB,0x40000000,0x47FFFFFF 4 | 2,显示控制器,1,64KB,0x48000000,0x4800FFFF 5 | 3,网络控制器,0,4KB,0x48014000,0x48014FFF 6 | 3,网络控制器,1,16KB,0x48010000,0x48013FFF 7 | -------------------------------------------------------------------------------- /materials/chapter2/control_inst.csv: -------------------------------------------------------------------------------- 1 | 指令,指令功能 2 | JIRL,相对寄存器偏移跳转并链接 3 | B,无条件相对转移 4 | BL,无条件相对转移并链接 5 | BEQ,等于时相对转移 6 | BNE,不等时相对转移 7 | BLT,有符号比较小于时相对转移 8 | BGE,有符号比较大于等于时相对转移 9 | BLTU,无符号比较小于时相对转移 10 | BGEU,无符号比较大于等于时相对转移 11 | BEQZ,等于0相对转移 12 | BNEZ,不等于0时相对转移 13 | -------------------------------------------------------------------------------- /materials/chapter2/int_type.csv: -------------------------------------------------------------------------------- 1 | C语言名称,LA32名称/数据长度,LA64名称/数据长度,X86名称/数据长度,X86-64名称/数据长度 2 | char,Byte/1,Byte/1,Byte/1,Byte/1 3 | short,Halfword/2,Halfword/2,Word/2,Word/2 4 | int,Word/4,Word/4,Dword/4,Dword/4 5 | long,Word/4,Dword/8,Dword/4,Qword/8 6 | long long,Dword/8,Dword/8,Qword/8,Qword/8 7 | -------------------------------------------------------------------------------- /materials/chapter6/ht_packet_format.csv: -------------------------------------------------------------------------------- 1 | 字节\数据位,7,6,5,4,3,2,1,0 2 | 0,命令相关内容,命令相关内容,命令,命令,命令,命令,命令,命令 3 | 1,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容 4 | 2,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容 5 | 3,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容,命令相关内容 6 | 7 | -------------------------------------------------------------------------------- /materials/chapter10/sections.c: -------------------------------------------------------------------------------- 1 | #pragma omp sections [private(list) |firstprivate(list)| \ 2 | lastprivate(list)|reduction(reduction-identifier:list)|nowait] newline 3 | { 4 | [#pragma omp section newline] 5 | Structured_block 6 | [#pragma omp section newline 7 | Structured_block] 8 | } -------------------------------------------------------------------------------- /materials/chapter2/regnum.csv: -------------------------------------------------------------------------------- 1 | 指令集,整数通用寄存器数 2 | Itanium,128 3 | VAX,16 4 | ARMv8,31 5 | PowerPC,32 6 | Alpha,32(包括“zero”) 7 | SPARC,32(包括“zero”) 8 | MIPS,在mips16模式下为8,在32/64位模式下为32(包括“zero”) 9 | ARMv7,在16位Thumb 模式下为7,在32位模式下为14 10 | X86,"16/32位时为8, 64位时为16" 11 | LoongArch,32(包括“zero”) 12 | -------------------------------------------------------------------------------- /materials/chapter2/switch_case.c: -------------------------------------------------------------------------------- 1 | int st(int a, int b, int c) 2 | { 3 | switch (a) { 4 | case 15: 5 | c = b & 0xf; 6 | case 10: 7 | return c + 50; 8 | case 12: 9 | case 17: 10 | return b + 50; 11 | case 14: 12 | return b; 13 | default: 14 | return a; 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /materials/chapter4/varg_passing.csv: -------------------------------------------------------------------------------- 1 | 参数序号,传递方式,低位 高位 2 | 0,$fa0,(扩展为double)1 3 | 1,$a0,(扩展为double)2 4 | 2,$a1, 第1和2字节为3和4,其余为填充 5 | 3,$a2,(long double低64位)5 6 | 4,$a3,(long double高64位)5 7 | 5,$a4,(扩展为double)6 8 | 6,$a5,(扩展到64位)7 9 | 7,$a6,8 10 | 8,$a7,(扩展为double)9 11 | 9,内存$sp + 0,10 12 | -------------------------------------------------------------------------------- /materials/chapter6/ht_signals.csv: -------------------------------------------------------------------------------- 1 | 引脚名称,方向,描述 2 | TX_CLKp/TX_CLKn,输出,发送端时钟信号 3 | TX_CTLp/TX_CTLn,输出,发送端控制信号,用于区分命令包与数据包 4 | TX_CADp[n:0]/TX_CADn[n:0],输出,发送端命令地址数据复用信号,用于传输各种包 5 | RX_CLKp/RX_CLKn,输入,接收端时钟 6 | RX_CTLp/RX_CTLn,输入,接收端控制信号,用于区分命令包与数据包 7 | RX_CADp[n:0]/RX_CADn[n:0],输入,接收端命令地址数据复用信号,用于传输各种包 8 | -------------------------------------------------------------------------------- /materials/chapter4/dynamic.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | extern long 4 | nested(long a, long b, long c, long d, long e, long f, long g, long h, long i); 5 | 6 | long dynamic(void){ 7 | long *p = alloca(64); 8 | p[0] = 0x123; 9 | 10 | return nested((long)p, p[0], 3, 4, 5, 6, 7, 8, 9); 11 | } 12 | -------------------------------------------------------------------------------- /materials/chapter4/la_reg.csv: -------------------------------------------------------------------------------- 1 | 寄存器编号,助记符,使用约定 2 | 0,zero,总是为0 3 | 1,ra,子程序返回地址 4 | 2,tp,Thread Pointer,指向线程私有存储区 5 | 3,sp,栈指针 6 | 4~11,a0~a7,子程序的前八个参数 7 | 4~5,v0~v1,v0/v1是a0/a1的别名,用于表示返回值 8 | 12~20,t0~t8,不需保存的暂存器 9 | 21,Reserved,暂时保留不用 10 | 22,fp,Frame Pointer,栈帧指针 11 | 23-31,s0~s8,寄存器变量,子程序使用需要保存和恢复 12 | -------------------------------------------------------------------------------- /materials/chapter4/syscall.csv: -------------------------------------------------------------------------------- 1 | 类型,系统调用,调用号,作用 2 | 进程控制,clone,220,克隆一个进程 3 | 进程控制,execv,221,执行一个程序 4 | 文件读写,read,63,读文件 5 | 文件读写,write,64,写文件 6 | 文件系统,mkdir,34,创建目录 7 | 文件系统,mount,40,挂载文件系统 8 | 系统控制,gettimeofday,169,获取系统时间 9 | 系统控制,reboot,142,重新启动 10 | 内存管理,mmap,222,映射虚拟内存页 11 | 信号量,semctl,191,信号量控制 12 | -------------------------------------------------------------------------------- /materials/chapter4/simple.S: -------------------------------------------------------------------------------- 1 | simple: 2 | addi.d $sp,$sp,-16 3 | st.d $fp,$sp,8 4 | addi.d $fp,$sp,16 5 | ld.d $fp,$sp,8 6 | bstrpick.w $a0,$a0,7,0 7 | add.w $a0,$a0,$a1 8 | addi.d $sp,$sp,16 9 | jr $ra 10 | -------------------------------------------------------------------------------- /materials/chapter7/reg_multiplex.csv: -------------------------------------------------------------------------------- 1 | 偏移,名称(初始化设置下,0x3[7] = 1),名称(工作模式下,0x3[7] = 0) 2 | 0x0,分频锁存器低位,数据寄存器 3 | 0x1,分频锁存器高位,中断使能寄存器 4 | 0x2,读的时候为中断标识寄存器,写的时候为FIFO控制寄存器,读的时候为中断标识寄存器,写的时候为FIFO控制寄存器 5 | 0x3,"线路控制寄存器。其中比特7为分频控制访问使能。该位为1时可以访问表中“初始化设置”寄存器,为0时访问表中“工作模式”寄存器","线路控制寄存器。其中比特7为分频控制访问使能。该位为1时可以访问表中“初始化设置”寄存器,为0时访问表中“工作模式”寄存器" 6 | -------------------------------------------------------------------------------- /multi_column.template: -------------------------------------------------------------------------------- 1 | 2 | :::: {.cols data-latex=""} 3 | 4 | ::: {.col width=48% data-latex="{0.48\textwidth}"} 5 | ``` 6 | ``` 7 | ::: 8 | 9 | ::: {.col width=4% data-latex="{0.04\textwidth}"} 10 | ``` 11 | ``` 12 | ::: 13 | 14 | ::: {.col width=48% data-latex="{0.48\textwidth}"} 15 | ``` 16 | ``` 17 | ::: 18 | :::: 19 | -------------------------------------------------------------------------------- /materials/chapter10/MPI.c: -------------------------------------------------------------------------------- 1 | #include "mpi.h" 2 | int main(int argc,char *argv[]) 3 | { int myid,count; 4 | MPI_Init(&agrc,&argv); /*启动计算*/ 5 | MPI_Comm_size(MPI_COMM_WORLD,&count); /*获得进程总数*/ 6 | MPI_Comm_rank(MPI_COMM_WORLD, &myid);/*获得自己进程号*/ 7 | printf("I am %d of %d\n", myid,count); /*打印消息*/ 8 | MPI_Finalize();/*结束计算*/ 9 | } -------------------------------------------------------------------------------- /materials/chapter8/3-8decoder_true_table.csv: -------------------------------------------------------------------------------- 1 | 输入1,输入2,输入3,输出1,输出2,输出3,输出4,输出5,输出6,输出7,输出8 2 | C,B,A,Y~0~,Y~1~,Y~2~,Y~3~,Y~4~,Y~5~,Y~6~,Y~7~ 3 | 0,0,0,1,0,0,0,0,0,0,0 4 | 0,0,1,0,1,0,0,0,0,0,0 5 | 0,1,0,0,0,1,0,0,0,0,0 6 | 0,1,1,0,0,0,1,0,0,0,0 7 | 1,0,0,0,0,0,0,1,0,0,0 8 | 1,0,1,0,0,0,0,0,1,0,0 9 | 1,1,0,0,0,0,0,0,0,1,0 10 | 1,1,1,0,0,0,0,0,0,0,1 -------------------------------------------------------------------------------- /materials/chapter12/perf-eval-class.csv: -------------------------------------------------------------------------------- 1 | a,b,c 2 | 性能建模,分析建模,概率模型 3 | 性能建模,分析建模,队列模型 4 | 性能建模,分析建模,马尔可夫模型 5 | 性能建模,分析建模,Petri网模型 6 | 性能建模,模拟建模,踪迹驱动模拟 7 | 性能建模,模拟建模,执行驱动模拟 8 | 性能建模,模拟建模,全系统模拟 9 | 性能建模,模拟建模,事件驱动模拟 10 | 性能建模,模拟建模,统计方法模拟 11 | 性能测量,片上硬件监测器(例如性能计数器),片上硬件监测器(例如性能计数器) 12 | 性能测量,片外硬件监测器,片外硬件监测器 13 | 性能测量,软件监测器,软件监测器 14 | 性能测量,微码插桩,微码插桩 15 | -------------------------------------------------------------------------------- /materials/chapter10/parallel_for.c: -------------------------------------------------------------------------------- 1 | #pragma omp parallel for [if(scalar_expression)|num_threads(integer-expression|default(shared|none)| \ 2 | private(list)|firstprivate(list)|lastprivate(list)|shared(list)|copyin(list)|reduction(Structured_block:list)| \ 3 | proc_bind(master|close|spread)|schedule(kind[,chunk_size])|collapse(n)|ordered] newline 4 | For_loop{ 5 | ... 6 | } -------------------------------------------------------------------------------- /materials/chapter12/UnixBench.csv: -------------------------------------------------------------------------------- 1 | 测试项目,项目描述 2 | Dhystone,测试和比较定点计算性能 3 | Whetstone,测试和比较浮点计算性能 4 | Execl系统调用,测量每秒能执行的execl系统调用的次数 5 | 文件拷贝,测量数据从一个文件拷贝到另一个文件的速率 6 | 管道吞吐率,测量一个进程每秒能执行的把512字节写入管道再读回来的次数 7 | 基于管道的上下文切换,测量两个进程基于管道交换一个不断增长的整数的速度 8 | 进程创建,测量一个进程创建和回收一个立刻退出的子进程的速度 9 | Shell脚本,测量进程每分钟能执行的一些文件操作脚本的次数 10 | 系统调用开销,测量进入和退出操作系统内核的开销 11 | 图形测试,粗略测量系统2D和3D图形操作的性能 12 | -------------------------------------------------------------------------------- /materials/chapter4/fun_la.S: -------------------------------------------------------------------------------- 1 | fun: 2 | movgr2fr.d $f0,$a0 # 注意这两行, $f0是参数a9,从$a0获得) 3 | movgr2fr.d $f1,$a2 # $f1从$a2获得,即参数a11 4 | fcmp.ceq.d $fcc0,$f1,$f0 # 比较a9和a11 5 | bceqz $fcc0,.L8 6 | move $a0,zero 7 | jr $ra 8 | .L8: 9 | addi.d $sp,$sp,-16 10 | st.d $ra,$sp,8 11 | bl %plt(abort) 12 | ld.d $ra,$sp,8 13 | -------------------------------------------------------------------------------- /materials/chapter10/pi_C.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | int main(){ 4 | int i; 5 | int num_steps=1000000; 6 | double x,pi,step,sum=0.0; 7 | step = 1.0/(double) num_steps; 8 | 9 | for(i=0;i 2 | #include 3 | int main(){ 4 | int i; 5 | int num_steps=1000000; 6 | double x,pi,step,sum=0.0; 7 | step = 1.0/(double) num_steps; 8 | #pragma omp parallel for private(i, x), reduction(+:sum) 9 | for(i=0;i p:last-child { 19 | text-align: right; 20 | } 21 | blockquote > p:first-child { 22 | text-align: inherit; 23 | } 24 | 25 | .cols {display: flex; } 26 | .width48 {width: 48%; } 27 | .width4 {width: 4%; } 28 | -------------------------------------------------------------------------------- /materials/chapter2/loop.c: -------------------------------------------------------------------------------- 1 | int test_for(int a) { 2 | int sum = 0; 3 | int i = 0; 4 | 5 | for (i = 0; i < a; i++) { 6 | sum += i; 7 | } 8 | 9 | return sum; 10 | } 11 | 12 | 13 | int test_while(int a) { 14 | int sum = 0; 15 | int i = 0; 16 | 17 | while (i < a) { 18 | sum += i; 19 | i++; 20 | } 21 | 22 | return sum; 23 | } 24 | 25 | 26 | int test_dowhile(int a) { 27 | int sum = 0; 28 | int i = 0; 29 | 30 | do { 31 | sum += i; 32 | i++; 33 | } while (i < a); 34 | 35 | return sum; 36 | } 37 | -------------------------------------------------------------------------------- /materials/chapter12/performance-formula.csv: -------------------------------------------------------------------------------- 1 | 硬件或软件,影响什么,如何影响 2 | 算法,程序的执行指令数,算法决定源程序执行指令的数目,好的算法可以大幅度减少运算的次数 3 | 编程语言,程序的执行指令数,编程语言可能对执行指令数产生巨大的影响,比如解释执行、即时编译或者原生编译的三类语言完成同样的功能所需要的指令数可能有数量级的差异 4 | 编译器和库,程序的执行指令数、CPI,编译器和库决定了源程序到计算机指令的翻译过程,编译程序的效率既影响到程序的执行指令数又影响到CPI,如Intel的ICC编译器编出来的程序,效率可比GCC高30%,其能充分利用向量化指令和针对处理器结构的优化 5 | 指令系统结构,程序的执行指令数、CPI和时钟频率,指令系统结构影响到CPU性能的3个方面,因为它影响到完成某个功能所需的指令数、每条指令的周期数,以及处理器的时钟频率 6 | 微体系结构,CPI和时钟频率,微体系结构的改进可以降低CPI,也可以细分流水线来提高频率 7 | 物理设计,时钟频率,物理设计和电路的进步可以降低每个时钟周期的FO4,从而提高时钟频率 8 | 工艺,时钟频率,工艺的进步使得晶体管变快从而提高时钟频率 9 | -------------------------------------------------------------------------------- /materials/chapter12/perf-tools.csv: -------------------------------------------------------------------------------- 1 | 工具,平台,链接 2 | Intel Vtune,Intel X86,http://software.intel.com/intel-vtune-amplifier-xe 3 | Linux perf,X86/MIPS等,http://perf.wiki.kernel.org 4 | oprofile,X86/MIPS等,http://oprofile.sourceforge.net 5 | DCPI,Alpha,http://www.hp.com/openvms/products/dcpi 6 | Perf-mon,UltraSPARC,http://www.sics.se/~mch/perf-monitor/index.html 7 | AMD CodeAnalyst,AMD X86,http://developer.amd.com/tools-and-sdks/compute__trashed/amd-codeanalyst-performance-analyzer-for-linux/ 8 | PAPI,X86,http://icl.cs.utk.edu/papi/software/index.html 9 | -------------------------------------------------------------------------------- /materials/chapter12/PARSEC.csv: -------------------------------------------------------------------------------- 1 | 程序,应用领域,并行模式,并行粒度,数据共享,数据交换,Pthreads,OpenMP,TBB 2 | blackscholes,金融分析,数据并行,粗粒度,低,低,X,X,X 3 | bodytrack,计算视觉,数据并行,中等粒度,高,中等,X,X,X 4 | canneal,工程类,非结构化,细粒度,高,高,X,, 5 | dedup,企业存储,流水线,中等粒度,高,高,X,, 6 | facesim,动画,数据并行,粗粒度,低,中等,X,, 7 | ferret,相似性查找,流水线,中等粒度,高,高,X,, 8 | fluidanimate,动画,数据并行,细粒度,低,中等,X,,X 9 | freqmine,数据挖掘,数据并行,中等粒度,高,中等,,X, 10 | raytrace,渲染,数据并行,中等粒度,高,低,X,, 11 | streamcluster,数据挖掘,数据并行,中等粒度,低,中等,X,,X 12 | swaptions,金融分析,数据并行,粗粒度,低,低,X,,X 13 | vips,媒体处理,数据并行,粗粒度,低,中等,X,, 14 | x264,媒体处理,流水线,粗粒度,高,高,X,, 15 | -------------------------------------------------------------------------------- /materials/chapter10/maritx_OpenMP.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #define n 1000 4 | double A[n][n],B[n][n],C[n][n]; 5 | 6 | int main() 7 | { 8 | int i,j,k; 9 | //初始化矩阵A和矩阵B 10 | for(i=0;i14)? 5 | addi.w $t0,$r0,10 6 | beq $a0,$t0,.L4 //(a==10)? 7 | addi.w $t0,$r0,12 8 | beq $a0,$t0,.L5 //(a==12)? 9 | jr $ra //return a 10 | .L3: 11 | addi.w $t0,$r0,15 12 | beq $a0,$t0,.L6 //(a==15)? 13 | addi.w $t0,$r0,17 14 | beq $a0,$t0,.L5 //(a==17)? 15 | jr $ra //return a 16 | .L6: 17 | andi $a2,$a1,0xf //b & 0xf 18 | .L4: 19 | addi.w $a0,$a2,50 //c + 50 20 | jr $ra 21 | .L5: 22 | addi.w $a0,$a1,50 //b + 50 23 | jr $ra 24 | .L7: 25 | or $a0,$a1,$r0 //return b 26 | jr $ra 27 | -------------------------------------------------------------------------------- /materials/chapter4/fun_mips.S: -------------------------------------------------------------------------------- 1 | fun: 2 | daddiu $sp,$sp,-16 3 | ldc1 $f1,16($sp) #a9从$sp + 16获得 4 | ldc1 $f0,32($sp) #a11从$sp + 32获得 5 | sd $28,0($sp) 6 | lui $28,%hi(%neg(%gp_rel(fun))) 7 | c.eq.d $fcc0,$f1,$f0 8 | daddiu $28,$28,%lo(%neg(%gp_rel(fun))) 9 | sd $31,8($sp) 10 | bc1f $fcc0,.L5 11 | daddu $28,$28,$25 12 | ld $31,8($sp) 13 | ld $28,0($sp) 14 | move $2,$0 15 | jr $31 16 | daddiu $sp,$sp,16 17 | 18 | .L5: 19 | ld $25,%call16(abort)($28) 20 | .reloc 1f,R_MIPS_JALR,abort 21 | 1: jalr $25 22 | nop 23 | -------------------------------------------------------------------------------- /materials/chapter2/loop.S: -------------------------------------------------------------------------------- 1 | test_for: 2 | or $t0,$r0,$r0 3 | or $t1,$r0,$r0 4 | .L2: 5 | blt $t0,$a0,.L3 6 | or $a0,$t1,$r0 7 | jr $ra 8 | .L3: 9 | add.w $t1,$t1,$t0 10 | addi.w $t0,$t0,1 11 | b .L2 12 | 13 | 14 | test_while: 15 | or $t0,$r0,$r0 16 | or $t1,$r0,$r0 17 | .L2: 18 | blt $t0,$a0,.L3 19 | or $a0,$t1,$r0 20 | jr $ra 21 | .L3: 22 | add.w $t1,$t1,$t0 23 | addi.w $t0,$t0,1 24 | b .L2 25 | 26 | test_dowhile: 27 | // a : $a0 28 | // sum : $t0 29 | // i : $t1 30 | or $t0,$r0,$r0 31 | or $t1,$r0,$r0 32 | .L1: 33 | add.w $t0,$t0,$t1 34 | addi.w $t1,$t1,1 35 | blt $t1,$a0,.L1 36 | or $a0,$t1,$r0 37 | jr $ra 38 | -------------------------------------------------------------------------------- /materials/chapter4/normal.S: -------------------------------------------------------------------------------- 1 | normal: 2 | addi.d $sp,$sp,-32 3 | addi.w $t0,$zero,9 # 0x9 4 | stptr.d $t0,$sp,0 5 | addi.w $a7,$zero,8 # 0x8 6 | addi.w $a6,$zero,7 # 0x7 7 | addi.w $a5,$zero,6 # 0x6 8 | addi.w $a4,$zero,5 # 0x5 9 | addi.w $a3,$zero,4 # 0x4 10 | addi.w $a2,$zero,3 # 0x3 11 | addi.w $a1,$zero,2 # 0x2 12 | addi.w $a0,$zero,1 # 0x1 13 | st.d $ra,$sp,24 14 | bl %plt(nested) 15 | ld.d $ra,$sp,24 16 | addi.d $sp,$sp,32 17 | jr $ra 18 | -------------------------------------------------------------------------------- /materials/chapter6/ddr3_udimm.csv: -------------------------------------------------------------------------------- 1 | 引脚名称,描述,引脚名称,描述 2 | A0-A15,SDRAM地址线,SCL,EEPROM I2C总线时钟 3 | BA0-BA3,SDRAM bank地址,SDA,EEPROM I2C总线数据线 4 | RAS#,SDRAM行地址选通,SA0-SA2,EEPROM I2C从设备地址 5 | CAS#,SDRAM列地址选通,VDD,SDRAMCore电源 6 | WE#,SDRAM写使能,VDDQ,SDRAM IO输出电源 7 | S0#-S1#,SDRAM片选信号,VrefDQ,SDRAM IO参考电源 8 | CKE0-CKE1,SDRAM时钟使能信号,VrefCA,SDRAM命令地址参考电源 9 | ODT0-ODT1,SDRAM终端匹配电阻控制信号,Vss,电源地信号 10 | DQ0-DQ63,DIMM内存数据线,VDDSPD,EEPROM电源 11 | CB0-CB7,DIMM ECC数据线,NC,空闲引脚 12 | DQS0-DQS8,SDRAM数据选通线,TEST,测试引脚 13 | ,(差分对的正沿),, 14 | DQS0-DQS8,SDRAM数据选通线,RESET#,复位引脚 15 | ,(差分对的负沿),, 16 | DM0-DM8,SDRAM数据Mask线,EVENT#,温度传感器引脚(可选) 17 | CK0-CK8,SDRAM时钟信号线,VTT,SDRAM IO终端匹配电阻电源 18 | ,(差分对的正沿),, 19 | CK0-CK8,SDRAM时钟信号线,RSVD,保留 20 | ,(差分对的负沿),, 21 | -------------------------------------------------------------------------------- /materials/chapter1/flops_bandwidth.csv: -------------------------------------------------------------------------------- 1 | CPU,年代,主频,SIMD,GFLOPS,GB/s,含SIMD比例,无SIMD比例 2 | DEC Alpha 21264,1996,600MHz,-,1.2,2,0.6,0.6 3 | AMD K7 Athlon,1999,700MHz,-,1.4,1.6,0.88,0.88 4 | Intel Pentium III,1999,600MHz,-,0.6,0.8,0.75,0.75 5 | Intel Pentium IV,2001,1.5GHz,-,3,3.2,0.94,0.94 6 | Intel Core2 E6420 X2,2007,2.8GHz,128位,22.4,8.5,2.64,1.32 7 | AMD K10 Phenom II X4 955,2009,3.2GHz,128位,51.2,21.3,2.4,1.2 8 | Intel Nehalem X5560,2009,2.8GHz,128位,44.8,32,1.4,0.7 9 | IBM Power8,2014,5.0GHz,128位,480,230.4,2.08,1.04 10 | AMD Piledriver Fx8350,2014,4.0GHz,256位,128,29.9,4.29,1.07 11 | Intel Skylake E3-1230 V5,2015,3.4GHz,256位,217.6,34.1,6.38,1.6 12 | 龙芯3A2000,2015,1.0GHz,-,16,16,1,1 13 | 龙芯3A5000,2020,2.5GHz,256位,160.0,51.2,3.13,0.78 14 | -------------------------------------------------------------------------------- /materials/chapter2/addressing.csv: -------------------------------------------------------------------------------- 1 | 寻址方式,格式,含义 2 | 寄存器寻址(Register),"ADD R1,R2",regs[R1]=regs[R1]+regs[R2] 3 | 立即数寻址(Immediate),"ADD R1,#2",regs[R1]=regs[R1]+2 4 | 偏移量寻址(Displacement),"ADD R1,100(R2)",regs[R1]=regs[R1]+mem[100+regs[R2]] 5 | 寄存器间接寻址(Reg.Indirect),"ADD R1,(R2)",regs[R1]=regs[R1]+mem[regs[R2]] 6 | 变址寻址(Indexed),"ADD R1,(R2+R3)",regs[R1]=regs[R1]+mem[regs[R2]+regs[R3]] 7 | 绝对寻址(Absolute),"ADD R1,(100)",regs[R1]=regs[R1]+mem[100] 8 | 存储器间接寻址(Mem.Indirect),"ADD R1,@(R2)",regs[R1]=regs[R1]+mem[mem[regs[R2]]] 9 | 自增量寻址(Autoincrement),"ADD R1,(R2)+","regs[R1]=regs[R1]+mem[regs[R2]],regs[R2]=regs[R2]+d" 10 | 自减量寻址(Autodecrement),"ADD R1,-(R2)","regs[R2]=regs[R2]-d,regs[R1]=regs[R1]+mem[regs[R2]]" 11 | 比例变址寻址(Scaled),"ADD R1,100(R2)(R3)",regs[R1]=regs[R1]+mem[100+regs[R2]+regs[R3]*d] 12 | -------------------------------------------------------------------------------- /materials/chapter6/axi.csv: -------------------------------------------------------------------------------- 1 | 引脚名称,方向,描述 2 | AWID[n:0],输出,写请求标识号 3 | AWADDR[m:0],输出,写请求地址 4 | AWSIZE[2:0],输出,写请求数据宽度 5 | AWLEN[3:0],输出,写请求数据长度 6 | AWBURST[1:0],输出,写请求类型 7 | AWAVALID,输出,写请求有效信号 8 | AWREADY,输入,写请求接收准备好信号 9 | WID[n:0],输出,写数据标识号,与写请求标识号对应 10 | WDATA[j:0],输出,写数据 11 | WSTRB[k:0],输出,写数据屏蔽信号,1位对应8个数据位 12 | WVALID,输出,写数据有效信号 13 | WREADY,输入,写数据接收准备好信号 14 | BID[n:0],输入,写响应标识号,与写请求标识号对应 15 | BRESP[1:0],输入,写响应状态 16 | WVALID,输入,写响应有效信号 17 | WREADY,输出,写响应接收准备好信号 18 | ARID[n:0],输出,读请求标识号 19 | ARADDR[m:0],输出,读请求地址 20 | ARSIZE[2:0],输出,读请求数据宽度 21 | ARLEN[3:0],输出,读请求数据长度 22 | ARBURST[1:0],输出,读请求类型 23 | ARAVALID,输出,读请求有效信号 24 | ARREADY,输入,读请求接收准备好信号 25 | RID[n:0],输入,读数据标识号,与读请求标识号对应 26 | RDATA[j:0],输入,读数据 27 | RRESP[1:0],输入,读响应状态 28 | RVALID,输入,读数据有效信号 29 | RREADY,输出,写读数据接收准备好信号 30 | -------------------------------------------------------------------------------- /materials/chapter12/cpu-params.csv: -------------------------------------------------------------------------------- 1 | 厂商,Loongson,AMD,Intel 2 | 处理器型号,3A5000,r3 1200,i3 9100f 3 | 上市时间,2021,2017,2019 4 | 工艺,12nm,14nm,14nm 5 | 指令集,LoongArch,X86-64,X86-64 6 | 微结构型号,LA464,Zen1,SkyLake 7 | 频率,2.5GHz,2.5GHz,2.5GHz 8 | 内存类型和频率,DDR4 3200MHz,DDR4 3200MHz,DDR4 2400MHz 9 | Cache层次,"64KB以及ICache 10 | 64KB一级DCache 11 | 256KB二级Cache 12 | 16MB三级Cache","64KB一级ICache 13 | 32KB一级DCache 14 | 512KB二级Cache 15 | 8MB三级Cache","32KB一级ICache 16 | 32KB一级DCache 17 | 256KB二级Cache 18 | 6MB三级Cache" 19 | 核心队列和重命名寄存器数量,128项ROB,64项load队列,48项store队列,32项分支队列,32项定点、32项浮点和32项访存保留站,128项定点和128项浮点重命名寄存器。,192项ROB,72项load队列,44项store队列,84项定点和96项浮点保留站,168项定点和160项浮点重命名寄存器。,224项ROB,72项load队列,56项store队列,97项统一保留站,180项定点和168项浮点重命名寄存器。 20 | 功能部件数,4个定点,2个访存部件,2个256位的浮点乘加,4个定点部件,2个访存部件,4个128位浮点部件(其中2个FMA/FMUL,2个FADD),4个定点、3个访存、3个256位浮点乘加 21 | -------------------------------------------------------------------------------- /materials/chapter2/switch_case.S: -------------------------------------------------------------------------------- 1 | st: 2 | addi.w $t0,$a0, -10 //a-10 3 | sltui $t1,$t0, 8 4 | beqz $t1, default //if (a-10)>=8 5 | //goto default 6 | la $t2, jr_table 7 | alsl.d $t1, $t0, $t2, 3 8 | //(a-10)*8+jr_table 9 | ld.d $t0, $t1, 0 10 | jr $t0 11 | default: 12 | or $a1,$a0,$r0 13 | case_14: 14 | or $a0,$a1,$r0 15 | jr $ra //return b for case_14, 16 | //return a for default 17 | case_15: 18 | andi $a2,$a1,0xf //b & 0xf 19 | case_10: 20 | addi.w $a1,$a2,50 //c+50 21 | b case_14 22 | case_12_17: 23 | addi.w $a1,$a1,50 //b+50 24 | b case_14 25 | # jump table 26 | .section .rodata 27 | .align 3 28 | jr_table: 29 | .dword case_10 30 | .dword default 31 | .dword case_12_17 32 | .dword default 33 | .dword case_14 34 | .dword case_15 35 | .dword default 36 | .dword case_12_17 37 | -------------------------------------------------------------------------------- /.github/workflows/deploy_bookdown.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | name: renderbook 10 | 11 | permissions: 12 | contents: write 13 | pages: write 14 | gh-pages: write 15 | 16 | jobs: 17 | bookdown: 18 | name: Render-Book 19 | runs-on: ubuntu-latest 20 | container: 21 | image: foxsen76/archbase-builder:latest 22 | steps: 23 | - name: bookdown builder 24 | run: cd /opt/archbase && git pull && make -j 8 && cp -a ./_book /github/home/ 25 | 26 | - name: Deploy to GitHub Pages 27 | uses: Cecilapp/GitHub-Pages-deploy@v3 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | with: 31 | email: 2503799872@qq.com 32 | build_dir: /github/home/_book/ 33 | -------------------------------------------------------------------------------- /materials/chapter3/exception.csv: -------------------------------------------------------------------------------- 1 | 异常代号,Ecode,Esubcode,异常说明,所属异常类别 2 | PIL,0x1,,load操作页无效异常,地址转换异常 3 | PIS,0x2,,store操作页无效异常,地址转换异常 4 | PIF,0x3,,取指操作页无效异常,地址转换异常 5 | PME,0x4,,页修改异常,地址转换异常 6 | PNR,0x5,,页不可读异常,地址转换异常 7 | PNX,0x6,,页不可执行异常,地址转换异常 8 | PPI,0x7,,页权限等级不合规异常,地址转换异常 9 | ADEF,0x8,0x0,取指地址错异常,指令执行中的错误 10 | ADEM,0x8,0x1,访存指令地址错异常,指令执行中的错误 11 | ALE,0x9,,地址非对齐异常,指令执行中的错误 12 | BCE,0xA,,边界约束检查错异常,指令执行中的错误 13 | SYS,0xB,,系统调用异常,系统调用和陷入 14 | BRK,0xC,,断点异常,系统调用和陷入 15 | INE,0xD,,指令不存在异常,指令执行中的错误 16 | IPE,0xE,,指令权限等级错异常,指令执行中的错误 17 | FPD,0xF,,浮点指令未使能异常,系统调用和陷入 18 | SXD,0x10,,128位向量扩展指令未使能异常,系统调用和陷入 19 | ASXD,0x11,,256位向量扩展指令未使能异常,系统调用和陷入 20 | FPE,0x12,0x0,基础浮点指令异常,需要软件修正的运算 21 | VFPE,0x12,0x1,向量浮点指令异常,需要软件修正的运算 22 | WPEF,0x13,0x0,取指监测点异常,系统调用和陷入 23 | WPEM,0x13,0x1,load/store操作监测点异常,系统调用和陷入 24 | INT,,,中断,外部事件 25 | TLBR,,,TLB重填异常,地址转换异常 26 | MERR,,,机器错误异常,数据完整性问题 27 | -------------------------------------------------------------------------------- /materials/chapter2/alu_inst.csv: -------------------------------------------------------------------------------- 1 | 指令,指令功能 2 | ADD.W,字加 3 | ADDI.W,字加立即数 4 | SUB.W,字减 5 | ADD.D,双字加 6 | ADDI.D,双字加立即数 7 | SUB.D,双字减 8 | SLT,有符号数比较小于置1 9 | SLTI,有符号数立即数比较小于置1 10 | SLTU,无符号数比较小于置1 11 | SLTUI,无符号数立即数比较小于置1 12 | AND,与 13 | OR,或 14 | XOR,异或 15 | NOR,或非 16 | ANDI,与立即数 17 | ORI,或立即数 18 | XORI,异或立即数 19 | LU12I.W,加载20位立即数到高位 20 | SLL.W,字逻辑左移变量位 21 | SRL.W,字逻辑右移变量位 22 | SRA.W,字算术右移变量位 23 | SLLI.W,字逻辑左移常量位 24 | SRLI.W,字逻辑右移常量位 25 | SRAI.W,字算术右移常量位 26 | SLL.D,双字逻辑左移变量位 27 | SRL.D,双字逻辑右移变量位 28 | SRA.D,双字算术右移变量位 29 | SLLI.D,双字逻辑左移常量位 30 | SRLI.D,双字逻辑右移常量位 31 | SRAI.D,双字算术右移常量位 32 | MUL.W,字乘取低半部分 33 | MULH.W,有符号字乘取高半部分 34 | MULH.WU,无符号字乘取高半部分 35 | MUL.D,双字乘取低半部分 36 | MULH.D,有符号双字乘取高半部分 37 | MULH.DU,无符号双字乘取高半部分 38 | DIV.W,有符号字除取商 39 | DIV.WU,无符号字除取商 40 | MOD.W,有符号字除取余 41 | MOD.WU,无符号字除取余 42 | DIV.D,有符号双字除取商 43 | DIV.DU,无符号双字除取商 44 | MOD.D,有符号双字除取余 45 | MOD.DU,无符号双字除取余 46 | -------------------------------------------------------------------------------- /materials/chapter4/Makefile: -------------------------------------------------------------------------------- 1 | # 注意:为了保持生成的文件宽度相同,每个源文件最长的行应该都保持为80,不够的可以在某一行末尾添空格 2 | # 理论上各种Mono的字体应该都可以满足,但实际测试发现很多字体并不能保证生成的字符宽度一致。 3 | 4 | %.c.png:%.c 5 | pygmentize -f png -O encoding=utf8,font_name='SimSun',line_numbers=False $< -o $@ 6 | # pygmentize -f png -O encoding=utf8,font_name='Noto Sans Mono CJK SC',line_numbers=False $< -o $@ 7 | 8 | %.S.png:%.S 9 | pygmentize -f png -O encoding=utf8,font_name='SimSun',line_numbers=False $< -o $@ 10 | # pygmentize -f png -O encoding=utf8,font_name='Noto Sans Mono CJK SC',line_numbers=False $< -o $@ 11 | 12 | %.txt.png:%.txt 13 | pygmentize -f png -O encoding=utf8,font_name='SimSun',line_numbers=False $< -o $@ 14 | 15 | all: fun.c.png fun_mips.S.png fun_la.S.png varg.c.png simple.c.png simple.S.png simple_nofp.S.png normal.c.png normal.S.png dynamic.c.png dynamic.S.png keyboard_interrupt.txt.png syscall_write.S.png 16 | @echo 'done' 17 | 18 | clean: 19 | rm -f *.png 20 | -------------------------------------------------------------------------------- /03-foreword-3rd.Rmd: -------------------------------------------------------------------------------- 1 | # 第三版序 {-} 2 | \markboth{第三版序}{第三版序} 3 | 4 | 在中国科学院大学讲授“计算机体系结构基础”课程五年以来,发现了《计算机体系结构基础》教材不少值得改进的地方。除了修订第2版的一些错误,这次第3版的主要改进内容包括以下三个方面。 5 | 6 | 一是加强计算机软硬件协同方面的内容。如第4章对应用程序二进制接口(Application Binary Interface,简称ABI)的描述更加清楚,增加了操作系统中关于用户程序地址空间分布的内容,并介绍了函数调用、例外处理、系统调用、线程切换、进程切换和虚拟机切换等六种场景的现场保留和恢复过程,希望读者可以通过上述过程更深入地了解计算机系统软硬件的配合。又如第7章在介绍计算机系统启动过程时把串口作为一只“麻雀”进行解剖,希望读者可以借此了解CPU对IO设备的访问与对内存的访问的不同。这样的地方还有不少。 7 | 8 | 二是对部分内容进行调整以使之更完整和适用。如第3章的特权指令系统部分,从例外、中断、存储管理等方面更详细地分析了操作系统内核专用的特权指令系统的内容。第12章的性能分析部分,在详细介绍Perf性能分析工具的基础上去掉了对Oprofile性能分析工具的介绍,适当缩减了性能测试与分析的具体案例内容,突出基准程序性能测试、Perf微结构数据统计和微测试程序(Microbench)等不同角度的方法与工具在性能分析工作中的应用。 9 | 10 | 三是在指令系统举例时使用LoongArch指令系统而不是MIPS指令系统。LoongArch是由龙芯团队在2020年推出的新型RISC指令系统。该指令系统摒弃了传统指令系统中部分不适应当前软硬件设计技术发展趋势的陈旧内容,吸纳了近年来指令系统设计领域诸多先进的技术发展成果,有助于硬件实现高性能低功耗的设计,也有利于软件的编译优化以及操作系统、虚拟机的开发。 11 | 12 | 一门课程的成熟往往需要十年时间。上述根据五年的教学经验进行的修改肯定还不够,需要在未来的教学工作中继续进行改进。 13 | 14 | 胡伟武 15 | 16 | 2021年6月29日 17 | 18 | \newpage 19 | -------------------------------------------------------------------------------- /materials/chapter1/spec_cpu2000.csv: -------------------------------------------------------------------------------- 1 | SPEC程序,运行时间/秒,分值,运行时间/秒1,分值1 2 | 164.gzip,503,279,323,433 3 | 175.vpr,389,360,222,632 4 | 176.gcc,206,533,110,1003 5 | 181.mcf,480,375,195,925 6 | 186.crafty,166,604,122,822 7 | 197.parser,707,254,266,676 8 | 252.eon,159,815,141,924 9 | 253.perlbmk,418,431,279,644 10 | 254.gap,338,325,155,711 11 | 255.vortex,291,652,125,1520 12 | 256.bzip2,383,391,285,527 13 | 300.twolf,421,712,364,824 14 | SPEC_INT2000,,447,,764 15 | 168.wupwise,338,473,123,1296 16 | 171.swim,1299,239,324,957 17 | 172.mgrid,1045,172,169,1062 18 | 173.applu,900,233,197,1067 19 | 177.mesa,244,574,156,896 20 | 178.galgel,507,572,143,2022 21 | 179.art,173,1504,97,2686 22 | 183.equake,457,285,96,1353 23 | 187.facerec,288,659,146,1306 24 | 188.ammp,538,409,274,803 25 | 189.lucas,716,279,181,1104 26 | 191.fma3d,550,382,203,1034 27 | 200.sixtrack,553,199,276,399 28 | 301.apsi,1159,224,235,1108 29 | SPEC_FP2000,,367,,1120 30 | -------------------------------------------------------------------------------- /materials/chapter12/perf-event-3A5000.csv: -------------------------------------------------------------------------------- 1 | 事件号,事件名称,事件定义 2 | 00H,clkcnt,时钟周期数 3 | 01H,roq_cmtcnt,提交指令数 4 | 02H,brq_branch,brq返回分支指令数 5 | 03H,brq_err_branch,brq返回预测错误分支指令数 6 | 04H,dtlb_access_cnt,数据tlb访问次数 7 | 08H,dcache_access,一级数据Cache访问次数 8 | 09H,dcache_miss,一级数据Cache缺失次数 9 | 0AH,vcache_access,victim_cache访问次数 10 | 0BH,vcache_miss,victim_cache缺失次数 11 | 0CH,scres_total,三级Cache访问次数 12 | 0DH,scres_miss,三级Cache缺失次数 13 | 24H,roq_vecfp_cmtcnt,处理器提交向量浮点运算指令数 14 | 25H,roq_vecint_cmtcnt,处理器提交向量定点运算指令数 15 | 27H,roq_ex_cnt,处理器中例外次数 16 | 29H,brq_bhtbrq,返回条件跳转类分支指令数 17 | 2AH,brq_err_bht,brq返回条件跳转类错误预测分支指令数 18 | 2FH,ade_ualign_cnt,发生不对齐访问错误次数 19 | 32H,roq_load_cmtcnt,处理器提交load指令数 20 | 33H,roq_store_cmtcnt,处理器提交store指令数 21 | 34H,roq_scaint_cmtcnt,处理器提交标量定点运算指令数 22 | 40H,roq_cmt_4inst,提交阻塞周期数 23 | 41H,dec_deliver_stall,前端阻塞周期数 24 | 42H,be_stall,后端阻塞周期数 25 | 4CH,fxq_stall,定点发射队列阻塞周期数 26 | 4EH,mmq_stall,访存发射队列阻塞周期数 27 | 4FH,ftq_stall,浮点发射队列阻塞周期数 28 | -------------------------------------------------------------------------------- /materials/chapter12/perf-event-nehalem.csv: -------------------------------------------------------------------------------- 1 | 事件号,umask值,事件,描述 2 | 3CH,00H,UnHalted Core cycles,时钟周期或拍数 3 | C0H,00H,Instruction retired,提交的指令数 4 | 2EH,4FH,LLC reference,访问最后一级Cache的数目 5 | 2EH,41H,LLC misses,访问最后一级Cache失效的数目 6 | C4H,00H,Branch Instruction Retired,提交的分支指令的数目 7 | C5H,00H,Branch Misses Retired,提交的误预测的分支指令的数目 8 | 0BH,01H,MEM_INST_RETIRED.LOADS,提交的load指令的数目 9 | 0BH,02H,MEM_INST_RETIRED.STORES,提交的store指令的数目 10 | 0EH,01H,UOPS_ISSUED.ANY,从重命名表发射到保留站的微码数目 11 | 0FH,02H,MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM,提交的load访存操作,命中芯片相邻核二级Cache,并在Modified状态 12 | 12H,01H,SIMD_INT_128.PACKED_MPY,128位的SIMD定点乘法操作的数目 13 | 24H,01H,L2_RQST.LD_MISS,二级load请求,二级Cache失效,二级load包括L1D失效和L1D预取 14 | 26H,FFH,L2_DATA_RQSTS.ANY,所有的二级数据请求 15 | 40H,0FH,L1D_CACHE_LD.MESI,所有的一级数据Cache读请求 16 | C4H,00H,BR_INST_RETIRED.ALL_BRANCHES,提交的分支指令 17 | D2H,0FH,RAT_STALLS.ANY,寄存器分配表引起的堵塞 18 | 2AH,01H,UNC_QMC_OCCUPANCY.CH0,内存控制器通道0读请求发生 19 | 60H,01H,UNC_DRAM_OPEN.CH0,DRAM通道0由于读或者写发出open命令,因为该页首先需要打开 20 | -------------------------------------------------------------------------------- /materials/chapter4/dynamic.S: -------------------------------------------------------------------------------- 1 | dynamic: 2 | addi.d $sp,$sp,-32 3 | st.d $fp,$sp,16 #保存fp 4 | st.d $ra,$sp,24 #保存ra 5 | addi.d $fp,$sp,32 # fp指向入口时的sp 6 | addi.d $sp,$sp,-64 # alloca 7 | addi.d $a0,$sp,16 # 从sp+16到sp+80为分配的alloca空间 8 | addi.w $t0,$zero,291 # 0x123 9 | stptr.d $t0,$a0,0 10 | addi.w $t0,$zero,9 # 0x9 11 | stptr.d $t0,$sp,0 # sp到sp+16为调子函数的参数区 12 | addi.w $a7,$zero,8 # 0x8 13 | addi.w $a6,$zero,7 # 0x7 14 | addi.w $a5,$zero,6 # 0x6 15 | addi.w $a4,$zero,5 # 0x5 16 | addi.w $a3,$zero,4 # 0x4 17 | addi.w $a2,$zero,3 # 0x3 18 | addi.w $a1,$zero,291 # 0x123 19 | bl %plt(nested) 20 | addi.d $sp,$fp,-32 21 | ld.d $ra,$sp,24 22 | ld.d $fp,$sp,16 23 | addi.d $sp,$sp,32 24 | jr $ra 25 | -------------------------------------------------------------------------------- /_output.yml: -------------------------------------------------------------------------------- 1 | bookdown::gitbook: 2 | css: css/style.css 3 | config: 4 | toc: 5 | collapse: none 6 | before: | 7 |
  • 计算机体系结构基础
  • 8 | after: | 9 |
  • 本书电子版由龙芯中科赞助提供
  • 10 | download: [pdf, docx] 11 | sharing: 12 | github: yes 13 | facebook: no 14 | pandoc_args: "--variable=lang:zh-CN" 15 | bookdown::pdf_book: 16 | includes: 17 | in_header: latex/preamble.tex 18 | before_body: latex/before_body.tex 19 | after_body: latex/after_body.tex 20 | keep_tex: yes 21 | dev: "cairo_pdf" 22 | latex_engine: xelatex 23 | # citation_package: natbib 24 | template: latex/template.tex 25 | pandoc_args: "--top-level-division=chapter" 26 | toc_depth: 3 27 | toc_unnumbered: no 28 | toc_appendix: yes 29 | quote_footer: ["\\begin{flushright}", "\\end{flushright}"] 30 | bookdown::epub_book: 31 | stylesheet: css/style.css 32 | bookdown::word_document2: 33 | reference_docx: ./word/template.docx 34 | toc: true 35 | -------------------------------------------------------------------------------- /05-online-version.Rmd: -------------------------------------------------------------------------------- 1 | # 关于本书的在线版本{-} 2 | 3 | 在第三版的改版过程中,作者们引入了一个创新尝试,试图将本书打造为一本活的教科书。具体来说,我们采用以文本为基础的rmarkdown格式编辑书本内容(相关的工具说明参见[bookdown](https://bookdown.org)),用git对其进行版本管理,并在互联网进行开源维护。在相应的网站上,还会提供出版社提供的与纸质版本一致的电子版本,以及相关的参考课件PPT和其他补充资源。我们认为这么做有几个好处: 4 | 5 | * 文字、图片和参考课件等素材的开放更方便教学使用。通过开源本书,我们期望能够使它得到更广泛的采用,得到更多的批评指正意见,使得它能够更快成熟。 6 | * 方便的版本管理系统有助于及时吸收对本书的勘误和改进。一方面,读者可以通过项目的问题管理系统或者其他渠道反馈问题,被接纳后会立即反映到在线的版本中,不必等待下一次改版印刷周期。另一方面,作者们也可以将之前对由于时间仓促未来得及完善的内容进行补充完善,或者根据产业的发展需求对内容进行适当调整。 7 | * 新的格式能够提供更丰富的表现形式。在rmarkdown文本的基础上,系统可以自动生成HTML、word和PDF等各种格式的发布版本,扩大适用范围。后续还可以利用其中某些格式来实现传统纸质书本无法做到的实时交互等功能。 当然,限于rmarkdown/bookdown目前的表现能力以及作者们对其的应用水平,在线版本生成的发布版本排版细节质量上很可能比不上出版社提供的、与纸质版本一致的原始电子文件,阅读体验上也不能替代纸质版本。有条件的读者仍然可以选择由机械工业出版社出版发行的纸质版本。 8 | 9 | 由于工具的限制,在线版本和纸质版本的版面效果并非完全一致。目前图表的编号也不一定一一对应,部分纸质书的表可能用图来代替。后续随着一些修订内容的添加,在线版本的文字和纸质版本也会有所差别。 10 | 11 | 本书内容的开源离不开出版社、龙芯中科技术股份有限公司和作者们的支持,在此表示感谢。出版社提供了精心排版后的电子版本和相应资源文件,并同意开放这些资源。龙芯中科技术股份有限公司提供了在线版本的一份web服务器资源以及部分经费支持。作者们接受了可能的出版收益损失。 12 | 13 | 本书电子版也得到了中国科学院大学研究生程轶涵、穆热迪力、徐淮、叶锦鹏、王铭剑(按姓氏拼音顺序)等同学的大力支持,他们协助完成了rmarkdown格式部分源代码的编辑和校对,在此表示感谢! 14 | -------------------------------------------------------------------------------- /materials/chapter10/pi_MPI.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include “mpi.h” 3 | int main(int argc, char **argv){ 4 | int num_steps=1000000; 5 | double x,pi,step,sum,sumallprocs; 6 | int i,start, end,temp; 7 | int ID,num_procs;//进程编号及组中的进程数量, 进程编号的范围为0到num_procs-1 8 | MPI_Status status; 9 | //Initialize the MPI environment 10 | MPI_Init(&argc,&argv); 11 | MPI_Comm_rank(MPI_COMM_WORLD,&ID);// 12 | MPI_Comm_size(MPI_COMM_WORLD,&num_procs); 13 | //任务划分并计算 14 | step = 1.0/num_steps; 15 | start = ID *(num_steps/num_procs) ; 16 | 17 | if (ID == num_procs-1) 18 | end = num_steps; 19 | else 20 | end = start + num_steps/num_procs; 21 | 22 | for(i=start; i 2 | #include 3 | #include 4 | #define NUM_THREADS 4 //假设线程数目为4 5 | #define n 1000 6 | double *A,*B,*C; 7 | void *matrixMult(void *id) {//计算矩阵乘 8 | int my_id = (int ) id; 9 | int i,j,k,start,end; 10 | //计算进程负责的部分 11 | start = my_id*(n/NUM_THREADS); 12 | if(my_id == NUMTHREADS-1) 13 | end = n; 14 | else 15 | end = start+(n/NUM_THREADS); 16 | 17 | for(i=start;i 2 | #include 3 | #include 4 | #define NUM_THREADS 4 //假设线程数目为4 5 | int num_steps = 1000000; 6 | double step = 0.0, sum = 0.0; 7 | pthread_mutex_t mutex; 8 | 9 | void *countPI(void *id) { 10 | int index = (int ) id; 11 | int start = index*(num_steps/NUM_THREADS); 12 | int end; 13 | double x = 0.0, y = 0.0; 14 | if (index == NUM_THREADS-1) 15 | end = num_steps; 16 | else 17 | end = start+(num_steps/NUM_THREADS); 18 | 19 | for (int i=start; i] input_event+0x30/0xc8 2 | [ 1075.597626] [<9000000000ca3ee4>] hidinput_report_event+0x44/0x68 3 | [ 1075.597628] [<9000000000ca1e30>] hid_report_raw_event+0x230/0x470 4 | [ 1075.597631] [<9000000000ca21a4>] hid_input_report+0x134/0x1b0 5 | [ 1075.597632] [<9000000000cb07ac>] hid_irq_in+0x9c/0x280 6 | [ 1075.597634] [<9000000000be9cf0>] __usb_hcd_giveback_urb+0xa0/0x120 7 | [ 1075.597636] [<9000000000c23a7c>] finish_urb+0xac/0x1c0 8 | [ 1075.597638] [<9000000000c24b50>] ohci_work.part.8+0x218/0x550 9 | [ 1075.597640] [<9000000000c27f98>] ohci_irq+0x108/0x320 10 | [ 1075.597642] [<9000000000be96e8>] usb_hcd_irq+0x28/0x40 11 | [ 1075.597644] [<9000000000296430>] __handle_irq_event_percpu+0x70/0x1b8 12 | [ 1075.597645] [<9000000000296598>] handle_irq_event_percpu+0x20/0x88 13 | [ 1075.597647] [<9000000000296644>] handle_irq_event+0x44/0xa8 14 | [ 1075.597648] [<900000000029abfc>] handle_level_irq+0xdc/0x188 15 | [ 1075.597651] [<90000000002952a4>] generic_handle_irq+0x24/0x40 16 | [ 1075.597652] [<900000000081dc50>] extioi_irq_dispatch+0x178/0x210 17 | [ 1075.597654] [<90000000002952a4>] generic_handle_irq+0x24/0x40 18 | [ 1075.597656] [<9000000000ee4eb8>] do_IRQ+0x18/0x28 19 | [ 1075.597658] [<9000000000203ffc>] except_vec_vi_end+0x94/0xb8 20 | -------------------------------------------------------------------------------- /materials/chapter12/SPEC2017.csv: -------------------------------------------------------------------------------- 1 | SPECrate 2017定点程序,SPECspeed 2017定点程序,语言,描述 2 | 500.perlbench_r,600.perlbench_s,C,文本处理,Perl解释器 3 | 502.gcc_r,602.gcc_s,C,编译,GNU C编译器 4 | 505.mcf_r,605.mcf_s,C,组合和优化,求解车辆调度问题 5 | 520.omnetpp_r,620.omnetpp_s,C++,计算机网络,离散事件模拟 6 | 523.xalancbmk_r,623.xalancbmk_s,C++,通过XSLT将XML转换为HTML 7 | 525.x264_r,625.x264_s,C,视频压缩,x264视频编解码 8 | 531.deepsjeng_r,631.deepsjeng_s,C++,人工智能,下棋程序,alpha-beta树搜索 9 | 541.leela_r,641.leela_s,C++,人工智能,下棋程序(go),蒙特卡洛树搜索 10 | 548.exchange2_r,648.exchange2_s,Fortran,人工智能,9x9数独,递归方式求解 11 | 557.xz_r,657.xz_s,C,压缩和解压缩,xz压缩程序 12 | SPECrate 2017浮点程序,SPECspeed 2017浮点程序,语言,描述 13 | 503.bwaves_r,603.bwaves_s,Fortran,计算流体动力学,爆炸建模 14 | 507.cactuBSSN_r,607.cactuBSSN_s,"C++, C, Fortran",物理,广义相对论和数值相对论,求解真空中的爱因斯坦方程 15 | 508.namd_r,,C++,结构生物学,模拟大规模的生物分子系统 16 | 510.parest_r,,C++,分子医学成像,光学层析成像问题的有限元求解器 17 | 511.povray_r,,"C++, C",计算机可视化,光线追踪应用POV-Ray 18 | 519.lbm_r,619.lbm_s,C,流体动力学 19 | 521.wrf_r,621.wrf_s,"Fortran, C",天气预报建模,基于新一代中尺度数值天气预报系统WRF 20 | 526.blender_r,,"C++, C",三维渲染和动画,基于开源的三维制作套件Blender 21 | 527.cam4_r,627.cam4_s,"Fortran, C",大气环流建模,地球系统模型CESM中的大气建模部分 22 | ,628.pop2_s,"Fortran, C",大规模海洋建模(气候层面),地球系统模型CESM中的海洋建模部分 23 | 538.imagick_r,638.imagick_s,C,图像处理,图像处理软件包ImageMagick中convert部分 24 | 544.nab_r,644.nab_s,C,分子动力学,基于生命科学计算领域中分子建模应用NAB(核酸构建器) 25 | 549.fotonik3d_r,649.fotonik3d_s,Fortran,计算电磁学,利用时域有限差分方法计算光子波导的透射系数 26 | 554.roms_r,654.roms_s,Fortran,区域海洋建模,基于区域海洋建模系统ROMS 27 | -------------------------------------------------------------------------------- /materials/chapter12/2006brpred.csv: -------------------------------------------------------------------------------- 1 | SPEC CPU2006,loongson 3A5000,zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,1.18%,1.10%,0.71%,0.93,0.6 3 | 401.bzip2,5.38%,5.08%,4.94%,0.94,0.92 4 | 403.gcc,1.52%,1.31%,1.03%,0.86,0.68 5 | 429.mcf,5.62%,3.57%,3.86%,0.64,0.69 6 | 445.gobmk,8.29%,9.80%,8.56%,1.18,1.03 7 | 456.hmmer,1.12%,0.66%,0.65%,0.59,0.58 8 | 458.sjeng,4.47%,5.89%,4.17%,1.32,0.93 9 | 462.libquantum,0.79%,0.21%,0.09%,0.27,0.11 10 | 464.h264ref,2.09%,1.64%,1.69%,0.78,0.81 11 | 471.omnetpp,2.53%,1.65%,1.83%,0.65,0.72 12 | 473.astar,13.60%,12.03%,12.75%,0.88,0.94 13 | 483.xalancbmk,0.43%,0.51%,0.34%,1.19,0.79 14 | SPECint_2006,2.48%,1.97%,1.64%,0.79,0.66 15 | 410.bwaves,0.09%,0.25%,0.15%,2.78,1.67 16 | 416.gamess,0.94%,1.07%,0.69%,1.14,0.73 17 | 433.milc,6.58%,0.40%,0.23%,0.06,0.03 18 | 434.zeusmp,1.38%,0.95%,0.12%,0.69,0.09 19 | 435.gromacs,7.04%,6.19%,6.11%,0.88,0.87 20 | 436.cactusADM,0.33%,1.49%,0.17%,4.52,0.52 21 | 437.leslie3d,0.33%,1.66%,0.22%,5.03,0.67 22 | 444.namd,4.66%,4.38%,4.52%,0.94,0.97 23 | 447.dealII,2.48%,2.31%,2.06%,0.93,0.83 24 | 450.soplex,5.51%,4.22%,4.40%,0.77,0.8 25 | 453.povray,1.86%,1.30%,0.56%,0.7,0.3 26 | 454.calculix,3.23%,2.80%,3.02%,0.87,0.93 27 | 459.GemsFDTD,0.29%,0.40%,0.10%,1.38,0.34 28 | 465.tonto,1.20%,0.93%,0.91%,0.78,0.76 29 | 470.lbm,0.46%,0.45%,0.38%,0.98,0.83 30 | 481.wrf,1.08%,0.51%,0.24%,0.47,0.22 31 | 482.sphinx3,2.35%,1.80%,1.88%,0.77,0.8 32 | SPECfp_2006,1.30%,1.24%,0.65%,0.95,0.5 33 | -------------------------------------------------------------------------------- /materials/chapter12/2006rate4.csv: -------------------------------------------------------------------------------- 1 | CPU2006 rate4,loongson 3A5000,Zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,102,125,136,122.50%,133.30% 3 | 401.bzip2,64.4,69.6,67.7,108.10%,105.10% 4 | 403.gcc,75.5,107,121,141.70%,160.30% 5 | 429.mcf,50.5,81.3,88.3,161.00%,174.90% 6 | 445.gobmk,95.8,79.7,85.4,83.20%,89.10% 7 | 456.hmmer,128,184,214,143.80%,167.20% 8 | 458.sjeng,86,71.3,89.2,82.90%,103.70% 9 | 462.libquantum,89.2,154,121,172.60%,135.70% 10 | 464.h264ref,147,178,201,121.10%,136.70% 11 | 471.omnetpp,45,54.8,60,121.80%,133.30% 12 | 473.astar,58.6,57.1,55.9,97.40%,95.40% 13 | 483.xalancbmk,68.6,90.2,111,131.50%,161.80% 14 | SPECint2006,79.39,95.99,103.16,120.90%,129.90% 15 | 410.bwaves,82.1,210,152,255.80%,185.10% 16 | 416.gamess,84.8,114,119,134.40%,140.30% 17 | 433.milc,44.6,103,83.2,230.90%,186.50% 18 | 434.zeusmp,83.6,179,190,214.10%,227.30% 19 | 435.gromacs,58.3,103,92.2,176.70%,158.10% 20 | 436.cactusADM,114,193,256,169.30%,224.60% 21 | 437.leslie3d,68.3,114,96,166.90%,140.60% 22 | 444.namd,80,85.8,94.7,107.30%,118.40% 23 | 447.dealII,142,187,176,131.70%,123.90% 24 | 450.soplex,62.9,99,101,157.40%,160.60% 25 | 453.povray,149,136,155,91.30%,104.00% 26 | 454.calculix,62,116,118,187.10%,190.30% 27 | 459.GemsFDTD,53.4,98.2,80.7,183.90%,151.10% 28 | 465.tonto,106,98.7,136,93.10%,128.30% 29 | 470.lbm,51.1,99.4,87.3,194.50%,170.80% 30 | 481.wrf,92.1,175,164,190.00%,178.10% 31 | 482.sphinx3,81.3,125,139,153.80%,171.00% 32 | SPECfp2006,78.65,126.28,124.86,160.60%,158.80% 33 | -------------------------------------------------------------------------------- /materials/chapter12/2006ipc.csv: -------------------------------------------------------------------------------- 1 | SPEC CPU2006,loongson 3A5000,zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,2.54,2.45,2.72,96.50%,107.10% 3 | 401.bzip2,1.86,1.73,1.68,93.00%,90.30% 4 | 403.gcc,1.1,1.1,1.22,100.00%,110.90% 5 | 429.mcf,0.32,0.33,0.45,103.10%,140.60% 6 | 445.gobmk,1.42,1.05,1.12,73.90%,78.90% 7 | 456.hmmer,2.97,2.57,2.68,86.50%,90.20% 8 | 458.sjeng,1.81,1.33,1.68,73.50%,92.80% 9 | 462.libquantum,1.42,1.67,1.88,117.60%,132.40% 10 | 464.h264ref,2.69,2.5,2.38,92.90%,88.50% 11 | 471.omnetpp,0.54,0.57,0.73,105.60%,135.20% 12 | 473.astar,0.89,0.79,0.77,88.80%,86.50% 13 | 483.xalancbmk,1.4,1.3,1.84,92.90%,131.40% 14 | SPECint_rate2006,1.34,1.24,1.4,92.90%,105.00% 15 | 410.bwaves,1.42,1.9,1.41,133.80%,99.30% 16 | 416.gamess,2.97,2.82,2.92,94.90%,98.30% 17 | 433.milc,0.4,0.97,0.81,242.50%,202.50% 18 | 434.zeusmp,1.39,2.01,1.68,144.60%,120.90% 19 | 435.gromacs,1.31,2.32,2.13,177.10%,162.60% 20 | 436.cactusADM,1.79,1.72,1.44,96.10%,80.40% 21 | 437.leslie3d,0.91,1.38,0.93,151.60%,102.20% 22 | 444.namd,1.66,1.75,1.94,105.40%,116.90% 23 | 447.dealII,1.52,1.79,1.7,117.80%,111.80% 24 | 450.soplex,0.84,0.94,1.02,111.90%,121.40% 25 | 453.povray,2.06,2.12,2.6,102.90%,126.20% 26 | 454.calculix,1.66,2.3,2.16,138.60%,130.10% 27 | 459.GemsFDTD,0.88,1.27,0.82,144.30%,93.20% 28 | 465.tonto,2.33,2.27,2.53,97.40%,108.60% 29 | 470.lbm,0.74,1.62,1.79,218.90%,241.90% 30 | 481.wrf,1.67,1.95,1.73,116.80%,103.60% 31 | 482.sphinx3,1.52,1.55,1.85,102.00%,121.70% 32 | SPECfp_rate2006,1.34,1.73,1.62,129.70%,121.00% 33 | -------------------------------------------------------------------------------- /materials/chapter12/2006speed.csv: -------------------------------------------------------------------------------- 1 | SPEC CPU2006,loongson 3A5000,zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,29.1,31.4,35.3,107.90%,121.30% 3 | 401.bzip2,17.3,18.5,19.2,106.90%,111.00% 4 | 403.gcc,23.9,30.8,38.1,128.90%,159.40% 5 | 429.mcf,27.1,27.6,37,101.80%,136.50% 6 | 445.gobmk,25.6,20.1,21.7,78.50%,84.80% 7 | 456.hmmer,39.2,46.2,54.7,117.90%,139.50% 8 | 458.sjeng,22.4,17.8,22.5,79.50%,100.40% 9 | 462.libquantum,78.8,141,123,178.90%,156.10% 10 | 464.h264ref,38,45.1,51,118.70%,134.20% 11 | 471.omnetpp,18.1,17.2,21.2,95.00%,117.10% 12 | 473.astar,19.3,15.6,15.8,80.80%,81.90% 13 | 483.xalancbmk,28.7,25.9,36.9,90.20%,128.60% 14 | SPECint2006,27.87,29.05,33.47,104.20%,120.10% 15 | 410.bwaves,54.4,99.1,85.2,182.20%,156.60% 16 | 416.gamess,22.9,28.4,30.2,124.00%,131.90% 17 | 433.milc,18,38.8,32.3,215.60%,179.40% 18 | 434.zeusmp,25,51,56.2,204.00%,224.80% 19 | 435.gromacs,15.3,25.8,23.5,168.60%,153.60% 20 | 436.cactusADM,84.8,135,228,159.20%,268.90% 21 | 437.leslie3d,36.2,48.2,62.8,133.10%,173.50% 22 | 444.namd,20.7,21.5,23.8,103.90%,115.00% 23 | 447.dealII,39.8,48.7,46.7,122.40%,117.30% 24 | 450.soplex,28.5,34.6,39.8,121.40%,139.60% 25 | 453.povray,39.1,34.2,41.5,87.50%,106.10% 26 | 454.calculix,17.6,29.2,30.1,165.90%,171.00% 27 | 459.GemsFDTD,35.2,65.1,56.7,184.90%,161.10% 28 | 465.tonto,28.4,25.2,37.2,88.70%,131.00% 29 | 470.lbm,28.8,67.5,74.6,234.40%,259.00% 30 | 481.wrf,36.5,53.9,65,147.70%,178.10% 31 | 482.sphinx3,34.1,40.6,48.4,119.10%,141.90% 32 | SPECfp2006,30.29,43.85,48.34,144.80%,159.60% 33 | -------------------------------------------------------------------------------- /materials/chapter12/2006speed-nopara.csv: -------------------------------------------------------------------------------- 1 | SPEC CPU2006,loongson 3A5000,zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,28.9,31.2,35.2,108.00%,121.80% 3 | 401.bzip2,17.2,18.7,19.2,108.70%,111.60% 4 | 403.gcc,23.8,30.8,38.4,129.40%,161.30% 5 | 429.mcf,26.9,27.5,36.3,102.20%,134.90% 6 | 445.gobmk,25.5,20,21.6,78.40%,84.70% 7 | 456.hmmer,39.2,46.2,54.3,117.90%,138.50% 8 | 458.sjeng,22.2,17.8,22.5,80.20%,101.40% 9 | 462.libquantum,54.6,63.6,72,116.50%,131.90% 10 | 464.h264ref,37.9,45.1,51,119.00%,134.60% 11 | 471.omnetpp,18,17.2,21,95.60%,116.70% 12 | 473.astar,19.1,15.4,15.8,80.60%,82.70% 13 | 483.xalancbmk,28.1,26,36.8,92.50%,131.00% 14 | SPECint2006,26.86,27.15,31.91,101.10%,118.80% 15 | 410.bwaves,54.8,98.9,85.4,180.50%,155.80% 16 | 416.gamess,22.2,28.6,30.2,128.80%,136.00% 17 | 433.milc,17.9,38.4,32.4,214.50%,181.00% 18 | 434.zeusmp,24.6,51,56.3,207.30%,228.90% 19 | 435.gromacs,15.2,25.8,23.2,169.70%,152.60% 20 | 436.cactusADM,52,58.6,93.1,112.70%,179.00% 21 | 437.leslie3d,36.2,48.3,62.3,133.40%,172.10% 22 | 444.namd,20.6,21.5,23.8,104.40%,115.50% 23 | 447.dealII,39.7,48.6,46.7,122.40%,117.60% 24 | 450.soplex,28.6,34.7,39.4,121.30%,137.80% 25 | 453.povray,38.1,34.1,40.9,89.50%,107.30% 26 | 454.calculix,17.6,29.3,30.2,166.50%,171.60% 27 | 459.GemsFDTD,29.8,40.2,45.7,134.90%,153.40% 28 | 465.tonto,28.1,25.2,37.1,89.70%,132.00% 29 | 470.lbm,28.5,66.8,74.8,234.40%,262.50% 30 | 481.wrf,32.1,56.8,64.9,176.90%,202.20% 31 | 482.sphinx3,33.5,40.8,48.6,121.80%,145.10% 32 | SPECfp2006,28.72,40.69,45.2,141.70%,157.30% 33 | -------------------------------------------------------------------------------- /materials/chapter5/alu.csv: -------------------------------------------------------------------------------- 1 | 处理器,寄存器,运算部件 2 | Alpha 21264,"Int regfile (80 4r6w) 3 | FP regfile (72 4r4w)","arith./logic unit; shift unit; 4 | mult unit; add/logic unit; 5 | shift unit; MVI/PLZ unit; 6 | arith/logic; arith/logic unit; 7 | FP add unit; FP mult unit; 8 | FP div/sqrt unit" 9 | MIPS R10000,"Int regfile (64 7r3w) 10 | FP regfile (64 5r3w)","arith./logic unit; shift unit; 11 | arith./logic unit; mult/div unit; 12 | FP add/sub unit; 13 | FP compae/coversion unit; 14 | FP mult unit; FP div/sqrt unit" 15 | HP PA8700,"Intarchregfile (32 8r4w) 16 | Int renregfile (56 9r4w) 17 | FP archregfile (32 8r4w) 18 | FP renregfile (56 9r4w)","2 arithlogic units; 19 | 2 shift merge units; 20 | 2 FP MAC units; 21 | 2 FP div/sqrt units" 22 | Ultra Sparc III,"Int regfile (144 7r3w) 23 | FP regfile (32 5r4w)","2 arith units; logic unit; shift unit; 24 | FP adder unit; graphic unit; 25 | FP div/sqrt unit; FP mult unit; graphic unit" 26 | Power4,"GPRS (80) 27 | FPRS (72)","2 fixed-point units; 28 | 2 floating-point units" 29 | Zen,"Int regfile (168) 30 | FP regfile (160)","4 floating-point/vector units; 31 | 4 fixed-point units" 32 | Skylake,"Int regfile (180) 33 | FP regfile (168)","4 fixed-point units; 34 | 3 floating-point/vector units" 35 | Power8,"GPRS (2×124) 36 | VSRS (2×144)"," 2 fixed-point units; 37 | 4 floating-point units; 38 | 2 vector units; 39 | decimal floating-point unit; 40 | crypto unit" 41 | 龙芯3A5000,"Int regfile (128 12r8w) 42 | FP regfile (128 8r6w)","4 fixed-point units; 43 | 2 floating-point/vector units" 44 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | from ubuntu:20.04 2 | 3 | # select faster mirror 4 | RUN sed -i -e 's/archive.ubuntu.com/mirrors.tuna.tsinghua.edu.cn/' /etc/apt/sources.list 5 | 6 | RUN DEBIAN_FRONTEND=noninteractive apt-get update 7 | 8 | # install necessary packages 9 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y git r-base-core vim 10 | 11 | # they are needed to build r packages via renv::restore() 12 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libxml2-dev \ 13 | libfontconfig1-dev libfreetype-dev libcairo2-dev 14 | RUN Rscript -e "install.packages('renv')" 15 | 16 | # use renv to recover r environment 17 | RUN cd /opt && git clone --depth 1 https://github.com/foxsen/archbase && cd archbase 18 | RUN cd /opt/archbase && Rscript -e "renv::restore()" 19 | 20 | # install tinytex environment for make pdf 21 | RUN Rscript -e "renv::install('tinytex')" 22 | RUN Rscript -e "tinytex::install_tinytex()" 23 | 24 | # install Chinese fonts, flextable need used fonts to calculate table width 25 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y fonts-noto-cjk 26 | 27 | # set Chinese locale 28 | RUN apt-get install -y locales 29 | RUN sed -i -e 's/# zh_CN.UTF-8 UTF-8/zh_CN.UTF-8 UTF-8/' /etc/locale.gen && \ 30 | locale-gen 31 | ENV LC_ALL zh_CN.UTF-8 32 | 33 | # install pandoc 2.11 34 | RUN DEBIAN_FRONTEND=noninteractive apt-get install -y wget 35 | RUN wget -c https://github.com/jgm/pandoc/releases/download/2.11.4/pandoc-2.11.4-1-amd64.deb && \ 36 | dpkg -i ./pandoc-2.11.4-1-amd64.deb && \ 37 | rm -f ./pandoc-2.11.4-1-amd64.deb 38 | 39 | RUN export PATH=$PATH:/root/bin && cd /opt/archbase && make pdf 40 | 41 | WORKDIR /opt/archbase 42 | ENV PATH="$PATH:/root/bin" 43 | -------------------------------------------------------------------------------- /materials/chapter12/2006brbandwidth.csv: -------------------------------------------------------------------------------- 1 | SPEC CPU2006,loongson 3A5000,zen1 r3-1200,skylake i3 9100f,zen1/3A5000,skylake/3A5000 2 | 400.perlbench,1285.22,1330.7,1507.39,103.50%,117.30% 3 | 401.bzip2,687.2,679.21,700.24,98.80%,101.90% 4 | 403.gcc,528.81,625.04,726.01,118.20%,137.30% 5 | 429.mcf,259.12,186.98,251.16,72.20%,96.90% 6 | 445.gobmk,666.08,495.13,533.88,74.30%,80.20% 7 | 456.hmmer,561.53,553.7,645.47,98.60%,114.90% 8 | 458.sjeng,896.9,662.04,840.26,73.80%,93.70% 9 | 462.libquantum,1109.34,1225.5,1380.07,110.50%,124.40% 10 | 464.h264ref,370.65,468.73,483.39,126.50%,130.40% 11 | 471.omnetpp,333.52,356.34,442.49,106.80%,132.70% 12 | 473.astar,512.46,354.04,349.51,69.10%,68.20% 13 | 483.xalancbmk,1045.16,963.92,1362.81,92.20%,130.40% 14 | SPECint_2006,615.94,576.2,667.59,93.50%,108.40% 15 | 410.bwaves,106.04,112.99,94.37,106.60%,89.00% 16 | 416.gamess,489.11,592.77,616.36,121.20%,126.00% 17 | 433.milc,11.81,48.688,43.09,412.30%,364.90% 18 | 434.zeusmp,171.28,101.7,88.22,59.40%,51.50% 19 | 435.gromacs,130.19,174.94,158.7,134.40%,121.90% 20 | 436.cactusADM,7.02,12.711,13.05,181.10%,185.90% 21 | 437.leslie3d,134.2,300.84,208.51,224.20%,155.40% 22 | 444.namd,239.86,234.96,260.51,98.00%,108.60% 23 | 447.dealII,659.59,854.76,830.83,129.60%,126.00% 24 | 450.soplex,386.41,462.08,514.6,119.60%,133.20% 25 | 453.povray,738.4,889.04,1060.83,120.40%,143.70% 26 | 454.calculix,233.14,351.24,352.48,150.70%,151.20% 27 | 459.GemsFDTD,70.57,136.25,111.24,193.10%,157.60% 28 | 465.tonto,409.28,426.25,513.51,104.10%,125.50% 29 | 470.lbm,24.27,57.785,63.32,238.10%,260.90% 30 | 481.wrf,323.44,460.5,504.5,142.40%,156.00% 31 | 482.sphinx3,420.3,432.66,546.2,102.90%,130.00% 32 | SPECfp_2006,152.4,214.47,213.45,140.70%,140.10% 33 | -------------------------------------------------------------------------------- /materials/chapter3/csr.csv: -------------------------------------------------------------------------------- 1 | 助记符,编号,说明 2 | CRMD,0x0,处理器当前运行模式及地址翻译模式、全局中断使能等配置信息 3 | PRMD,0x1,触发当前普通异常的现场的运行模式及全局中断使能等配置信息 4 | EUEN,0x2,扩展部件的使能控制 5 | MISC,0x3,各权限等级下是否运行使用部分特权指令等杂项配置 6 | ECFG,0x4,局部中断使能、异常入口间距等配置信息 7 | ESTAT,0x5,记录异常和中断发生原因 8 | ERA,0x6,普通异常处理返回地址 9 | BADV,0x7,记录触发地址相关异常的访存虚地址 10 | BADI,0x8,记录触发异常指令的指令编码 11 | EENTRY,0xC,配置普通异常处理程序入口地址 12 | TLBIDX,0x10,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 13 | TLBEHI,0x11,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 14 | TLBELO0,0x12,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 15 | TLBELO1,0x13,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 16 | ASID,0x18,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 17 | STLBPS,0x1E,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 18 | PGDL,0x19,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 19 | PGDH,0x1A,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 20 | PGD,0x1B,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 21 | PWCL,0x1C,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 22 | PWCH,0x1D,存储管理(TLB)相关寄存器,将在第3节进行详细介绍 23 | SAVEn,0x30+n,保存临时数据 24 | TID,0x40,恒定频率计时器和定时器相关寄存器 25 | TCFG,0x41,恒定频率计时器和定时器相关寄存器 26 | TVAL,0x42,恒定频率计时器和定时器相关寄存器 27 | CNTC,0x43,恒定频率计时器和定时器相关寄存器 28 | TICLR,0x44,恒定频率计时器和定时器相关寄存器 29 | LLBCTL,0x60,LLBit的控制 30 | TLBRENTRY,0x88,TLB重填异常处理专用寄存器 31 | TLRBBADV,0x89,TLB重填异常处理专用寄存器 32 | TLBERA,0x8A,TLB重填异常处理专用寄存器 33 | TLBRSAVE,0x8B,TLB重填异常处理专用寄存器 34 | TLBRELO0,0x8C,TLB重填异常处理专用寄存器 35 | TLBRELO1,0x8D,TLB重填异常处理专用寄存器 36 | TLBREHI,0x8E,TLB重填异常处理专用寄存器 37 | TLBRPRMD,0x8F,TLB重填异常处理专用寄存器 38 | MERRCTL,0x90,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 39 | MERRINFO1,0x91,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 40 | MERRINFO2,0x92,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 41 | MERRENTRY,0x93,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 42 | MERRERA,0x94,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 43 | MERRSAVE,0x95,由Cache校验错所引发的机器错误异常的相关控制状态寄存器 44 | DMW0~DMW3,0x180~0x183,直接映射配置窗口0~3的配置寄存器 45 | DBG,0x500,调试相关的控制状态寄存器 46 | DERA,0x501,调试相关的控制状态寄存器 47 | DSAVE,0x502,调试相关的控制状态寄存器 48 | -------------------------------------------------------------------------------- /02-foreword-author.Rmd: -------------------------------------------------------------------------------- 1 | # 自序 {-} 2 | \markboth{自序}{自序} 3 | 4 | 计算机专业有几门“当家”的核心课程是关于“如何造计算机”的,硬件方面以计算机组成原理和计算机体系结构为主,软件方面以操作系统和编译原理为主。其他如离散数学、编程语言、数据结构、数字逻辑等计算机专业的学科基础课也很重要,除了计算机专业,其他使用计算机的专业如自动化专业、电子专业也在学。 5 | 6 | 我从2001年就开始从事龙芯处理器的研发,并从2005年起在中国科学院大学教授计算机体系结构课程,其间接触了很多从各高校计算机专业毕业的学生,发现他们在大学时主要练就了诸如编程等“怎么用计算机”的本领,对操作系统和体系结构这种“如何造计算机”的课程,或者没有系统学习,或者只学到一些概念。比如对于“从打开电源到计算机启动再到登录界面”或者“从按一下空格键到翻一页PPT”这样的过程,如果问及计算机系统内部包括CPU、南北桥、GPU在内的硬件以及包括操作系统和应用程序在内的软件是如何协同工作的,计算机专业毕业的学生几乎没有人说得明白。 7 | 8 | 我1986年到中国科学技术大学计算机系学习的时候,教授我计算机体系结构课程的老师都是亲自造过计算机的,他们能够讲明白计算机软硬件工作的原理性过程。改革开放以来,我国主要使用国外的CPU和操作系统“攒”计算机,学术界也几乎不从事CPU和操作系统这种核心技术的研究工作,全国两千多个计算机专业主要使用国外教材或者翻译的国外教材教授学生“如何造计算机”。由于计算机体系结构和操作系统都是工程性很强的学科,而任课老师却没有机会参与设计CPU和操作系统,因此教学生的时候难免照本宣科,使学生只学到一些概念,难以对计算机的软硬件工作过程融会贯通。 9 | 10 | 发展以CPU和操作系统为代表的自主基础软硬件,是国家的战略需求,而人才培养是满足该战略需求的必要条件。因此,自2005年开始,我便结合龙芯CPU的实践在中国科学院研究生院开设计算机体系结构课程,并于2011年依托清华大学出版社出版了《计算机体系结构》教材。2014年,中国科学院大学设立并开始招收本科生,要求我也给本科生讲授计算机体系结构课程。刚开始觉得难度很大,因为计算机体系结构非常复杂,给研究生讲清楚都不容易,给本科生讲清楚就更难。 11 | 12 | 经过反复思考,我觉得可以利用这个机会,建设包括本科生、硕士生、博士生在内的计算机体系结构课程体系,由浅入深地培养“造计算机”的人才。为此,我们计划编写一套分别面向本科生、硕士生、博士生的“计算机体系结构”课程教材。 13 | 14 | 面向本科生的教材为《计算机体系结构基础》。主要内容包括:作为软硬件界面的指令系统结构,包含CPU、GPU、南北桥协同的计算机硬件结构,CPU的微结构,并行处理结构,计算机性能分析等。上述面面俱到的课程安排主要是考虑到体系结构学科的完整性,但重点是软硬件界面及计算机硬件结构,微结构则是硕士课程的主要内容。 15 | 16 | 面向硕士生的教材为《计算机体系结构》。主要介绍CPU的微结构,包括指令系统结构、二进制和逻辑电路、静态流水线、动态流水线、多发射流水线、运算部件、转移猜测、高速缓存、TLB、多核对流水线的影响等内容。 17 | 18 | 面向博士生的教材为《高级计算机体系结构》。中科院计算所的“高级计算机体系结构”课程是博士生精品课程的一部分,主要强调实践性,使学生通过设计真实的(而不是简化的)CPU,运行真实的(而不是简化的)操作系统,对结构设计、物理设计、操作系统软件做到融会贯通。 19 | 20 | 在此基础上,还将推出计算机体系结构实验平台和实验教材。 21 | 22 | 这套教材的编写突出以下特点:一是系统性,体系是“系统的系统”,很难脱离软硬件环境纯粹就体系结构本身讲解计算机体系结构,需要对体系结构、基础软件、电路和器件融会贯通;二是基础性,计算机体系结构千变万化,但几十年发展沉淀下来的原理性的东西不多,希望从体系结构快速发展的很多现象中找出一些内在的、本质的东西;三是实践性,计算机体系结构是实践性很强的学科,要设计在“硅”上运行而不是在“纸”上运行的体系结构。 23 | 24 | 胡伟武 25 | 26 | \newpage 27 | 28 | -------------------------------------------------------------------------------- /materials/chapter10/martix_MPI.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "mpi.h" 3 | #define n 1000 4 | int main(int argc, char **argv) 5 | { 6 | double *A,*B,*C; 7 | int i,j,k; 8 | int ID,num_procs,line; 9 | MPI_Status status; 10 | 11 | MPI_Init(&argc,&argv); //Initialize the MPI environment 12 | MPI_Comm_rank(MPI_COMM_WORLD,&ID);//获取当前进程号 13 | MPI_Comm_size(MPI_COMM_WORLD,&num_procs);//获取进程数目 14 | 15 | //分配数据空间 16 | A = (double *)malloc(sizeof(double)*n*n); 17 | B = (double *)malloc(sizeof(double)*n*n); 18 | C = (double *)malloc(sizeof(double)*n*n); 19 | line = n/num_procs;//按进程数来划分数据 20 | 21 | if(ID==0){ //节点0,主进程 22 | //初始化数组 23 | for(i=0;i% autofit() 63 | 64 | ft_out <- width(ft_out, width = dim(ft_out)$widths*pgwidth /(flextable_dim(ft_out)$widths)) 65 | return(ft_out) 66 | } 67 | 68 | ``` 69 | \newpage{} 70 | 71 | # 丛书序言 {-} 72 | 73 | \markboth{丛书序言}{丛书序言} 74 | 75 | 人工智能、 大数据、 云计算、 物联网、 移动互联网以及区块链等新一代信息技术及其融合 76 | 发展是当代智能科技的主要体现, 并形成智能时代在当前以及未来一个时期的鲜明技术特征。 77 | 智能时代来临之际, 面对全球范围内以智能科技为代表的新技术革命, 高等教育也处于重要的 78 | 变革时期。 目前, 全世界高等教育的改革正呈现出结构的多样化、 课程内容的综合化、 教育模 79 | 式的学研产一体化、 教育协作的国际化以及教育的终身化等趋势。 在这些背景下, 计算机专业 80 | 教育面临着重要的挑战与变化, 以新型计算技术为核心并快速发展的智能科技正在引发我国计 81 | 算机专业教育的变革。 82 | 83 | 计算机专业教育既要凝练计算技术发展中的 “ 不变要素” , 也要更好地体现时代变化引发 84 | 的教育内容的更新; 既要突出计算机科学与技术专业的核心地位与基础作用, 也需兼顾新设专 85 | 业对专业知识结构所带来的影响。 适应智能时代需求的计算机类高素质人才, 除了应具备科学 86 | 思维、 创新素养、 敏锐感知、 协同意识、 终身学习和持续发展等综合素养与能力外, 还应具有 87 | 深厚的数理理论基础、 扎实的计算思维与系统思维、 新型计算系统创新设计以及智能应用系统 88 | 综合研发等专业素养和能力。 89 | 90 | 智能时代计算机类专业教育计算机类专业系统能力培养 2.0 研究组在分析计算机科学技术 91 | 及其应用发展特征、 创新人才素养与能力需求的基础上, 重构和优化了计算机类专业在数理基 92 | 础、 计算平台、 算法与软件以及应用共性各层面的知识结构, 形成了计算与系统思维、 新型系 93 | 统设计创新实践等能力体系, 并将所提出的智能时代计算机类人才专业素养及综合能力培养融 94 | 于专业教育的各个环节之中, 构建了适应时代的计算机类专业教育主流模式。 95 | 96 | 自 2008 年开始, 教育部计算机类专业教学指导委员会就组织专家组开展计算机系统能力 97 | 培养的研究、 实践和推广, 以注重计算系统硬件与软件有机融合、 强化系统设计与优化能力为 98 | 主体, 取得了很好的成效。 2018 年以来, 为了适应智能时代计算机教育的重要变化, 计算机 99 | 类专业教学指导委员会及时扩充了专家组成员, 继续实施和深化智能时代计算机类专业教育的 100 | 研究与实践工作, 并基于这些工作形成计算机类专业系统能力培养 2.0。 101 | 102 | 本系列教材就是依据智能时代计算机类专业教育研究结果而组织编写并出版的。 其中的教 103 | 材在智能时代计算机专业教育研究组起草的指导大纲框架下, 形成不同风格, 各有重点与侧 104 | 重。 其中多数将在已有优秀教材的基础上, 依据智能时代计算机类专业教育改革与发展需求, 105 | 优化结构、 重组知识, 既注重不变要素凝练, 又体现内容适时更新; 有的对现有计算机专业知 106 | 识结构依据智能时代发展需求进行有机组合与重新构建; 有的打破已有教材内容格局, 支持更 107 | 为科学合理的知识单元与知识点群, 方便在有效教学时间范围内实施高效的教学; 有的依据新 108 | 型计算理论与技术或新型领域应用发展而新编, 注重新型计算模型的变化, 体现新型系统结 109 | 构, 强化新型软件开发方法, 反映新型应用形态。 110 | 111 | 本系列教材在编写与出版过程中, 十分关注计算机专业教育与新一代信息技术应用的深度 112 | 融合, 将实施教材出版与 MOOC 模式的深度结合、 教学内容与新型试验平台的有机结合, 以及 113 | 教学效果评价与智能教育发展的紧密结合。 114 | 115 | 本系列教材的出版, 将支撑和服务智能时代我国计算机类专业教育, 期望得到广大计算机 116 | 教育界同人的关注与支持, 恳请提出建议与意见。 期望我国广大计算机教育界同人同心协力, 117 | 努力培养适应智能时代的高素质创新人才, 以推动我国智能科技的发展以及相关领域的综合应 118 | 用, 为实现教育强国和国家发展目标做出贡献。 119 | 120 |

    121 | 智能时代计算机类专业教育计算机类专业系统能力培养 2.0 研究组 122 |
    123 | 2020 年 1 月 124 |

    125 | -------------------------------------------------------------------------------- /chinese-gb7714-2005-numeric.csl: -------------------------------------------------------------------------------- 1 | 2 | 214 | -------------------------------------------------------------------------------- /latex/template.tex: -------------------------------------------------------------------------------- 1 | \documentclass[$if(fontsize)$$fontsize$,$endif$$if(lang)$$babel-lang$,$endif$$if(papersize)$$papersize$paper,$endif$$for(classoption)$$classoption$$sep$,$endfor$]{$documentclass$} 2 | $if(beamerarticle)$ 3 | \usepackage{beamerarticle} % needs to be loaded first 4 | $endif$ 5 | $if(fontfamily)$ 6 | \usepackage[$for(fontfamilyoptions)$$fontfamilyoptions$$sep$,$endfor$]{$fontfamily$} 7 | $else$ 8 | \usepackage{lmodern} 9 | $endif$ 10 | $if(linestretch)$ 11 | \usepackage{setspace} 12 | \setstretch{$linestretch$} 13 | $endif$ 14 | \usepackage{amssymb,amsmath} 15 | \usepackage{ifxetex,ifluatex} 16 | \usepackage{fixltx2e} % provides \textsubscript 17 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 18 | \usepackage[$if(fontenc)$$fontenc$$else$T1$endif$]{fontenc} 19 | \usepackage[utf8]{inputenc} 20 | $if(euro)$ 21 | \usepackage{eurosym} 22 | $endif$ 23 | \else % if luatex or xelatex 24 | \ifxetex 25 | \usepackage{xltxtra,xunicode} 26 | \else 27 | \usepackage{fontspec} 28 | \fi 29 | \defaultfontfeatures{Ligatures=TeX,Scale=MatchLowercase} 30 | $for(fontfamilies)$ 31 | \newfontfamily{$fontfamilies.name$}[$fontfamilies.options$]{$fontfamilies.font$} 32 | $endfor$ 33 | $if(euro)$ 34 | \newcommand{\euro}{€} 35 | $endif$ 36 | $if(mainfont)$ 37 | \setmainfont[$for(mainfontoptions)$$mainfontoptions$$sep$,$endfor$]{$mainfont$} 38 | $endif$ 39 | $if(sansfont)$ 40 | \setsansfont[$for(sansfontoptions)$$sansfontoptions$$sep$,$endfor$]{$sansfont$} 41 | $endif$ 42 | $if(monofont)$ 43 | \setmonofont[Mapping=tex-ansi$if(monofontoptions)$,$for(monofontoptions)$$monofontoptions$$sep$,$endfor$$endif$]{$monofont$} 44 | $endif$ 45 | $if(CJKmainfont)$ 46 | \usepackage{xeCJK} 47 | \setCJKmainfont[$for(CJKoptions)$$CJKoptions$$sep$,$endfor$]{$CJKmainfont$} 48 | $endif$ 49 | \fi 50 | % use upquote if available, for straight quotes in verbatim environments 51 | \IfFileExists{upquote.sty}{\usepackage{upquote}}{} 52 | % use microtype if available 53 | \IfFileExists{microtype.sty}{% 54 | \usepackage{microtype} 55 | \UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts 56 | }{} 57 | $if(geometry)$ 58 | \usepackage[$for(geometry)$$geometry$$sep$,$endfor$]{geometry} 59 | $endif$ 60 | \usepackage[unicode=true]{hyperref} 61 | $if(colorlinks)$ 62 | \PassOptionsToPackage{usenames,dvipsnames}{color} % color is loaded by hyperref 63 | $endif$ 64 | \hypersetup{ 65 | $if(title-meta)$ 66 | pdftitle={$title-meta$}, 67 | $endif$ 68 | $if(author-meta)$ 69 | pdfauthor={$author-meta$}, 70 | $endif$ 71 | $if(keywords)$ 72 | pdfkeywords={$for(keywords)$$keywords$$sep$, $endfor$}, 73 | $endif$ 74 | $if(colorlinks)$ 75 | colorlinks=true, 76 | linkcolor=$if(linkcolor)$$linkcolor$$else$Maroon$endif$, 77 | citecolor=$if(citecolor)$$citecolor$$else$Blue$endif$, 78 | urlcolor=$if(urlcolor)$$urlcolor$$else$Blue$endif$, 79 | $else$ 80 | pdfborder={0 0 0}, 81 | $endif$ 82 | breaklinks=true} 83 | \urlstyle{same} % don't use monospace font for urls 84 | $if(lang)$ 85 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 86 | \usepackage[shorthands=off,$for(babel-otherlangs)$$babel-otherlangs$,$endfor$main=$babel-lang$]{babel} 87 | $if(babel-newcommands)$ 88 | $babel-newcommands$ 89 | $endif$ 90 | \else 91 | \usepackage{polyglossia} 92 | \setmainlanguage[$polyglossia-lang.options$]{$polyglossia-lang.name$} 93 | $for(polyglossia-otherlangs)$ 94 | \setotherlanguage[$polyglossia-otherlangs.options$]{$polyglossia-otherlangs.name$} 95 | $endfor$ 96 | \fi 97 | $endif$ 98 | $if(natbib)$ 99 | \usepackage{natbib} 100 | \bibliographystyle{$if(biblio-style)$$biblio-style$$else$plainnat$endif$} 101 | $endif$ 102 | $if(biblatex)$ 103 | \usepackage[$if(biblio-style)$style=$biblio-style$,$endif$$for(biblatexoptions)$$biblatexoptions$$sep$,$endfor$]{biblatex} 104 | $for(bibliography)$ 105 | \addbibresource{$bibliography$} 106 | $endfor$ 107 | $endif$ 108 | $if(csl-refs)$ 109 | \newlength{\cslhangindent} 110 | \setlength{\cslhangindent}{1.5em} 111 | \newlength{\csllabelwidth} 112 | \setlength{\csllabelwidth}{3em} 113 | \newenvironment{CSLReferences}[3] % #1 hanging-ident, #2 entry sp 114 | {% don't indent paragraphs 115 | \setlength{\parindent}{0pt} 116 | % turn on hanging indent if param 1 is 1 117 | \ifodd #1 \everypar{\setlength{\hangindent}{\cslhangindent}}\ignorespaces\fi 118 | % set line spacing 119 | % set entry spacing 120 | \ifnum #2 > 0 121 | \setlength{\parskip}{#3\baselineskip} 122 | \fi 123 | }% 124 | {} 125 | \usepackage{calc} % for \widthof, \maxof 126 | \newcommand{\CSLBlock}[1]{#1\hfill\break} 127 | \newcommand{\CSLLeftMargin}[1]{\parbox[t]{\maxof{\widthof{#1}}{\csllabelwidth}}{#1}} 128 | \newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth}{#1}} 129 | \newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1} 130 | $endif$ 131 | $if(listings)$ 132 | \usepackage{listings} 133 | $endif$ 134 | $if(lhs)$ 135 | \lstnewenvironment{code}{\lstset{language=Haskell,basicstyle=\small\ttfamily}}{} 136 | $endif$ 137 | $if(highlighting-macros)$ 138 | $highlighting-macros$ 139 | $endif$ 140 | $if(verbatim-in-note)$ 141 | \usepackage{fancyvrb} 142 | \VerbatimFootnotes % allows verbatim text in footnotes 143 | $endif$ 144 | $if(tables)$ 145 | \usepackage{longtable,booktabs} 146 | % Fix footnotes in tables (requires footnote package) 147 | \IfFileExists{footnote.sty}{\usepackage{footnote}\makesavenoteenv{long table}}{} 148 | $endif$ 149 | $if(graphics)$ 150 | \usepackage{graphicx,grffile} 151 | \makeatletter 152 | \def\maxwidth{\ifdim\Gin@nat@width>\linewidth\linewidth\else\Gin@nat@width\fi} 153 | \def\maxheight{\ifdim\Gin@nat@height>\textheight\textheight\else\Gin@nat@height\fi} 154 | \makeatother 155 | % Scale images if necessary, so that they will not overflow the page 156 | % margins by default, and it is still possible to overwrite the defaults 157 | % using explicit options in \includegraphics[width, height, ...]{} 158 | \setkeys{Gin}{width=\maxwidth,height=\maxheight,keepaspectratio} 159 | $endif$ 160 | $if(links-as-notes)$ 161 | % Make links footnotes instead of hotlinks: 162 | \renewcommand{\href}[2]{#2\footnote{\url{#1}}} 163 | $endif$ 164 | $if(strikeout)$ 165 | \usepackage[normalem]{ulem} 166 | % avoid problems with \sout in headers with hyperref: 167 | \pdfstringdefDisableCommands{\renewcommand{\sout}{}} 168 | $endif$ 169 | $if(indent)$ 170 | $else$ 171 | \IfFileExists{parskip.sty}{% 172 | \usepackage{parskip} 173 | }{% else 174 | \setlength{\parindent}{0pt} 175 | \setlength{\parskip}{6pt plus 2pt minus 1pt} 176 | } 177 | $endif$ 178 | \setlength{\emergencystretch}{3em} % prevent overfull lines 179 | \providecommand{\tightlist}{% 180 | \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}} 181 | $if(numbersections)$ 182 | \setcounter{secnumdepth}{$if(secnumdepth)$$secnumdepth$$else$5$endif$} 183 | $else$ 184 | \setcounter{secnumdepth}{0} 185 | $endif$ 186 | $if(subparagraph)$ 187 | $else$ 188 | % Redefines (sub)paragraphs to behave more like sections 189 | \ifx\paragraph\undefined\else 190 | \let\oldparagraph\paragraph 191 | \renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}} 192 | \fi 193 | \ifx\subparagraph\undefined\else 194 | \let\oldsubparagraph\subparagraph 195 | \renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}} 196 | \fi 197 | $endif$ 198 | $if(dir)$ 199 | \ifxetex 200 | % load bidi as late as possible as it modifies e.g. graphicx 201 | $if(latex-dir-rtl)$ 202 | \usepackage[RTLdocument]{bidi} 203 | $else$ 204 | \usepackage{bidi} 205 | $endif$ 206 | \fi 207 | \ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex 208 | \TeXXeTstate=1 209 | \newcommand{\RL}[1]{\beginR #1\endR} 210 | \newcommand{\LR}[1]{\beginL #1\endL} 211 | \newenvironment{RTL}{\beginR}{\endR} 212 | \newenvironment{LTR}{\beginL}{\endL} 213 | \fi 214 | $endif$ 215 | 216 | % set default figure placement to htbp 217 | \makeatletter 218 | \def\fps@figure{htbp} 219 | \makeatother 220 | 221 | $for(header-includes)$ 222 | $header-includes$ 223 | $endfor$ 224 | 225 | $if(title)$ 226 | \title{$title$$if(thanks)$\thanks{$thanks$}$endif$} 227 | $endif$ 228 | $if(subtitle)$ 229 | \providecommand{\subtitle}[1]{} 230 | \subtitle{$subtitle$} 231 | $endif$ 232 | $if(author)$ 233 | \author{$for(author)$$author$$sep$ \and $endfor$} 234 | $endif$ 235 | $if(institute)$ 236 | \providecommand{\institute}[1]{} 237 | \institute{$for(institute)$$institute$$sep$ \and $endfor$} 238 | $endif$ 239 | \date{$date$} 240 | 241 | \begin{document} 242 | $if(title)$ 243 | \maketitle 244 | $endif$ 245 | $if(abstract)$ 246 | \begin{abstract} 247 | $abstract$ 248 | \end{abstract} 249 | $endif$ 250 | 251 | $for(include-before)$ 252 | $include-before$ 253 | 254 | $endfor$ 255 | $if(toc)$ 256 | { 257 | \setcounter{tocdepth}{$toc-depth$} 258 | \tableofcontents 259 | } 260 | $endif$ 261 | $if(lot)$ 262 | \listoftables 263 | $endif$ 264 | $if(lof)$ 265 | \listoffigures 266 | $endif$ 267 | $body$ 268 | 269 | $if(natbib)$ 270 | $if(bibliography)$ 271 | $if(biblio-title)$ 272 | $if(book-class)$ 273 | \renewcommand\bibname{$biblio-title$} 274 | $else$ 275 | \renewcommand\refname{$biblio-title$} 276 | $endif$ 277 | $endif$ 278 | \bibliography{$for(bibliography)$$bibliography$$sep$,$endfor$} 279 | 280 | $endif$ 281 | $endif$ 282 | $if(biblatex)$ 283 | \printbibliography$if(biblio-title)$[title=$biblio-title$]$endif$ 284 | 285 | $endif$ 286 | $for(include-after)$ 287 | $include-after$ 288 | 289 | $endfor$ 290 | \end{document} 291 | -------------------------------------------------------------------------------- /30-conclusion.Rmd: -------------------------------------------------------------------------------- 1 | # 总结 {-} 2 | \markboth{总结}{总结} 3 | 4 | 经过本课程的学习,大家对计算机体系结构有了一个具体的了解,但要问起什么是计算机体系结构,多半答不上来。本章内容是笔者撰写的《中国大百科全书》计算机体系结构词条初稿,力求完整、准确地对计算机体系结构进行描述,作为本书的总结。 5 | 6 | 7 | 8 | 计算机体系结构(Computer Architecture)是描述计算机各组成部分及其相互关系的一组规则和方法,是程序员所看到的计算机属性。计算机体系结构主要研究内容包括指令系统结构(Instruction Set Architecture,简称ISA)和计算机组织结构(Computer Organization)。微体系结构(Micro-architecture)是微处理器的组织结构,并行体系结构是并行计算机的组织结构。冯诺依曼结构的存储程序和指令驱动执行原理是现代计算机体系结构的基础。 9 | 10 | 计算机体系结构可以有不同层次和形式的表现方式。计算机体系结构通常用指令系统手册和结构框图来表示,结构框图中的方块表示计算机的功能模块,线条和箭头表示指令和数据在功能模块中的流动,结构框图可以不断分解一直到门级或晶体管级。计算机体系结构也可以用高级语言如C语言来表示,形成结构模拟器,用于性能评估和分析。用硬件描述语言(如Verilog)描述的体系结构可以通过电子设计自动化(Electronic Design Automation,简称EDA)工具进行功能验证和性能分析,转换成门级及晶体管级网表,并通过布局布线最终转换成版图,用于芯片制造。 11 | 12 | 1、冯诺依曼结构及其基本原理 13 | 14 | 1945年匈牙利籍数学家冯诺伊曼结合EDVAC计算机的研制提出了世界上第一个完整的计算机体系结构,被称为冯诺伊曼结构。冯诺依曼结构的主要特点是:①计算机由存储器、运算器、控制器、输入设备、输出设备五部分组成,其中运算器和控制器合称为中央处理器(Central Processing Processor,简称CPU)或处理器。②存储器是按地址访问的线性编址的一维结构,每个单元的位数固定。指令和数据不加区别混合存储在同一个存储器中。③控制器从存储器中取出指令并根据指令要求发出控制信号控制计算机的操作。控制器中的程序计数器指明要执行的指令所在的存储单元地址。程序计数器一般按顺序递增,但可按指令要求而改变。④以运算器为中心,输入输出(Input/Output,简称IO)设备与存储器之间的数据传送都经过运算器。 15 | 16 | 随着技术的进步,冯诺依曼结构得到了持续改进,主要包括:①以运算器为中心改进为以存储器为中心,数据流向更加合理,从而使运算器、存储器和IO设备能够并行工作。②由单一的集中控制改进为分散控制。早期的计算机工作速度低,运算器、存储器、控制器和IO设备可以在同一个时钟信号的控制下同步工作。现在运算器、存储器与IO设备的速度差异很大,需要异步分散控制。③从基于串行算法改进为适应并行算法,出现了流水线处理器、超标量处理器、向量处理器、多核处理器、对称多处理机(Symmetric Multiprocessor,简称SMP)、大规模并行处理机(Massively Parallel Processing,简称MPP)和机群系统等。④出现了为适应特殊需要的专用计算机,如图形处理器(Graphic Processing Unit,简称GPU)、数字信号处理器(Digital Signal Processor,简称DSP)等。 17 | 18 | 虽然经过了长期的发展,以存储程序和指令驱动执行为主要特点的冯诺伊曼结构仍是现代计算机的主流结构。非冯诺伊曼计算机的研究成果包括依靠数据驱动的数据流计算机、图约计算机等。 19 | 20 | 21 | 22 | 2、指令系统结构 23 | 24 | 计算机系统为软件编程提供不同层次的功能和逻辑抽象,主要包括应用程序编程接口(Application Programming Interface,简称API)、应用程序二进制接口(Application Binary Interface,简称ABI)以及ISA三个层次。 25 | 26 | API是应用程序的高级语言编程接口,在编写程序的源代码时使用。常见的API包括C语言、Fortran语言、Java语言、Javascript语言、OpenGL图形编程接口等。使用一种API编写的应用程序经重新编译后可以在支持该API的不同计算机上运行。 27 | 28 | ABI是应用程序访问计算机硬件及操作系统服务的接口,由计算机的用户态指令和操作系统的系统调用组成。为了实现多进程访问共享资源的安全性,处理器设有“用户态”与“核心态”。用户程序在用户态下执行,操作系统向用户程序提供具有预定功能的系统调用函数来访问只有核心态才能访问的硬件资源。当用户程序调用系统调用函数时,处理器进入核心态执行诸如访问IO设备、修改处理器状态等只有核心态才能执行的指令。处理完系统调用后,处理器返回用户态执行用户代码。相同的应用程序二进制代码可以在相同ABI的不同计算机上运行。 29 | 30 | ISA是计算机硬件的语言系统,也叫机器语言,是计算机软件和硬件的界面,反映了计算机所拥有的基本功能。计算机硬件设计人员采用各种手段实现指令系统,软件设计人员使用指令系统编制各种软件,用这些软件来填补指令系统与人们习惯的计算机使用方式之间的语义差距。设计指令系统就是要选择应用程序和操作系统中一些基本操作应由硬件实现还是由软件通过一串指令实现,然后具体确定指令系统的指令格式、类型、操作以及对操作数的访问方式。相同的应用程序及操作系统二进制代码可以在相同ISA的不同计算机上运行。 31 | 32 | ISA通常由指令集合、处理器状态和例外三部分组成。 33 | 34 | 指令包含操作编码和操作数编码,操作编码指明操作类型,操作数编码指明操作对象。常见的指令编码方式包括复杂指令系统(Complex Instruction Set Computer,简称CISC),精简指令系统(Reduced Instruction Set Computer,简称RISC)和超长指令字(Very Long Instruction Word,简称VLIW)等。 35 | 36 | 指令的操作主要包括:运算指令,如加减乘除、逻辑运算、移位等;数据传送指令,如取数和存数;程序控制指令,如条件和非条件转移、函数调用和返回等;处理器状态控制指令,如系统调用指令、调试指令、同步指令等。 37 | 38 | 指令的操作数包括立即数、寄存器、存储器、IO设备寄存器等。立即数是指令中直接给出的数据。寄存器用于保存处理器最常用的数据,包括通用寄存器、浮点寄存器、控制寄存器等,处理器访问寄存器时直接在指令中指明要访问的寄存器号。存储器是计算机中保存指令和数据的场所,计算机取指令和存取数据都要先计算指令和数据所处的存储单元地址并根据地址来读写存储器。IO设备都有专门的设备控制器,设备控制器向处理器提供一组IO设备寄存器,处理器通过读写IO设备寄存器来获知IO设备状态并控制IO设备,处理器写入IO设备寄存器的数据,会被设备控制器解释成控制IO设备的命令。 39 | 40 | 指令需要明确操作数的数据表示、编址方式、寻址方式和定位方式等。数据表示给出指令系统可直接调用的数据类型,包括整数、实数、布尔值、字符等。编址方式给出编址单位、编址方法和地址空间等;编址单位有字编址、字节编址和位编址,普遍使用的是字节编址;常见的编址方法有大尾端(Big Endian)和小尾端(Little Endian)两种;地址空间包括寄存器空间、存储器空间和IO设备空间,有些ISA把存储器和IO设备统一编址,有些ISA把寄存器、存储器和IO设备统一编址。主要寻址方式有:立即数寻址、寄存器寻址、直接寻址、间接寻址、变址寻址(包括相对寻址和基址寻址)和堆栈寻址等。定位方式确定指令和数据的物理地址;直接定位方式在程序装入主存储器之前确定指令和数据的物理地址;静态定位方式在程序装入主存储器的过程中进行地址变换,确定指令和数据的物理地址;动态定位方式在程序执行过程中,当访问到相应的指令或数据时才进行地址变换,确定指令和数据的物理地址;现代计算机多采用动态定位方式。 41 | 42 | 通用计算机至少要有两种工作状态:核心态和用户态。两个状态下所能使用的指令和存储空间等硬件资源有差别。一般来说,只有操作系统才能工作在核心态,用户程序只能工作在用户态并可以通过例外和系统调用进入核心态。有些处理器有更多工作状态,如核心态(Kernel)、监督态(Hypervisor)、管理态(Supervisor)、用户态(User)等。 43 | 44 | 例外(Exception)系统是现代计算机的重要组成部分,除了管理外部设备之外,还承担了包括故障处理、实时处理、分时操作系统、程序的跟踪调试、程序的监测、用户程序与操作系统的联系等任务。发生例外时,处理器需要保存包括例外原因、例外指令的程序计数器内容等信息,把处理器状态切换为核心态并跳转到事先指定的操作系统例外处理入口地址;执行完例外处理程序后,处理器状态切换回发生例外前的状态并跳转回发生例外的指令继续执行。指令系统要指明例外源的分类组织、例外系统的软硬件功能分配、例外现场的保存和恢复、例外优先级、例外响应方式和屏蔽方式等。 45 | 46 | 47 | 48 | 3、计算机组织结构 49 | 50 | 计算机组织结构指计算机的组成部分及各部分之间的互连实现。典型计算机的基本组成包括CPU、存储器、IO设备,其中CPU包括运算器和控制器,IO设备包括输入设备和输出设备。计算机从输入设备接收程序和数据,存放在存储器中;CPU运行程序处理数据;最后将结果数据通过输出设备输出。 51 | 52 | 运算器包括算术和逻辑运算部件、移位部件、寄存器等。复杂运算如乘除法、开方及浮点运算可用程序实现或由运算器实现。寄存器既可用于保存数据,也可用于保存地址。运算器还可设置条件码寄存器等专用寄存器,条件码寄存器保存当前运算结果的状态,如运算结果是正数、负数或零,是否溢出等。 53 | 54 | 控制器控制指令流和每条指令的执行,内含程序计数器和指令寄存器等。程序计数器存放当前执行指令的地址,指令寄存器存放当前正在执行的指令。指令通过译码产生控制信号,用于控制运算器、存储器、IO设备的工作。这些控制信号可以用硬连线逻辑产生,也可以用微程序产生,也可以两者结合产生。为了获得高指令吞吐率,可以采用指令重叠执行的流水线技术,以及同时执行多条指令的超标量技术。当遇到执行时间较长或条件不具备的指令时,把条件具备的后续指令提前执行(称为乱序执行)可以提高流水线效率。控制器还产生一定频率的时钟脉冲,用于计算机各组成部分的同步。 55 | 56 | 存储器存储程序和数据,又称主存储器或内存,一般用动态随机存储器(Dynamic Random Access Memory,简称DRAM)实现。CPU可以直接访问它,IO设备也频繁地和它交换数据。存储器的存取速度往往满足不了CPU的快速要求,容量也满足不了应用的需要,为此将存储系统分为高速缓存(Cache)、主存储器和辅助存储器三个层次。Cache存放当前CPU最频繁访问的部分主存储器内容,可以采用比DRAM速度快但容量小的静态随机存储器(Static Random Access Memory,简称SRAM)实现。数据和指令在Cache和主存储器之间的调动由硬件自动完成。为扩大存储器容量,使用磁盘、磁带、光盘等能存储大量数据的存储器作为辅助存储器。计算机运行时所需的应用程序、系统软件和数据等都先存放在辅助存储器中,在运行过程中分批调入主存储器。数据和指令在主存储器和辅助存储器之间的调动由操作系统完成。CPU访问存储器时,面对的是一个高速(接近于Cache的速度)、大容量(接近于辅助存储器的容量)的存储器。现代计算机中还有少量只读存储器(Read Only Memory,简称ROM)用来存放引导程序和基本输入输出系统(Basic Input Output System,简称BIOS)等。现代计算机访问内存时采用虚拟地址,操作系统负责维护虚地址和物理地址转换的页表,集成在CPU中的存储管理部件(Memory Management Unit,简称MMU)负责把虚拟地址转换为物理地址。 57 | 58 | IO设备实现计算机和外部世界的信息交换。传统的IO设备有键盘、鼠标、打印机和显示器等;新型的IO设备能进行语音、图像、影视的输入输出和手写体文字输入,并支持计算机之间通过网络进行通信;磁盘等辅助存储器在计算机中也当作IO设备来管理。处理器通过读写IO设备控制器中的寄存器来访问及控制IO设备。高速IO设备可以在处理器安排下直接与主存储器成批交换数据,称为直接存储器访问(Directly Memory Access,简称DMA)。处理器可以通过查询设备控制器状态与IO设备进行同步,也可以通过中断与IO设备进行同步。 59 | 60 | 由若干个CPU、存储器和IO设备可以构成比单机性能更高的并行处理系统。 61 | 62 | 现代计算机各部件之间采用总线互连。为了便于不同厂家生产的设备能在一起工作以及设备的扩充,总线的标准化非常重要。常见的总线包括片上总线如AXI总线,系统总线如QPI和HT总线,内存总线如SDRAM总线,IO总线如PCIE、SATA、USB总线等。 63 | 64 | 65 | 66 | 4、微体系结构 67 | 68 | 半导体工艺的发展允许在单个芯片内部集成CPU,称为微处理器(Microprocessor)。微体系结构(简称微结构)是微处理器的组织结构,描述处理器的组成部分及其互连关系,以及这些组成部分及其互连如何实现指令系统的功能。对于同一个指令系统,复杂的微结构性能高,功耗和成本也高;简单的微结构性能低,功耗和成本也低。随着半导体工艺的不断发展,实现相同指令系统的处理器微结构不断升级并不断提高性能。 69 | 70 | 计算机执行指令一般包含以下过程:从存储器取指令并对取回的指令进行译码,从存储器或寄存器读取指令执行需要的操作数,执行指令,把执行结果写回存储器或寄存器。上述过程称为一个指令周期。计算机不断重复指令周期直到完成程序的执行。体系结构研究的一个永恒主题就是不断加速上述指令执行周期,从而提高计算机运行程序的效率。人们提出了很多提高指令执行效率的技术,包括RISC技术、指令流水线技术、高速缓存技术、转移预测技术、乱序执行技术、超标量(又称为多发射)技术等。 71 | 72 | RISC技术。自从1940年代发明电子计算机以来,处理器结构和指令系统经历了一个由简单到复杂,由复杂到简单,又由简单到复杂的否定之否定过程。早期的处理器结构及其指令系统由于工艺技术的限制,不可能做得很复杂。随着工艺技术的发展,1960年代后流水线技术、动态调度技术、向量机技术被广泛使用,处理器结构和指令系统变得复杂。1980年代提出的RISC技术通过减少指令数目、定长编码、降低编码密度等以简化指令的取指、译码、执行的逻辑以提高频率,通过增加寄存器数目及load-store结构以提高效率。后来随着深度流水、超标量、乱序执行的实现,RISC结构变得越来越复杂。 73 | 74 | RISC指令采用load-store结构,运算指令从寄存器读取操作数并把结果写回寄存器,访存指令则负责在寄存器和存储器间交换数据,运算指令和访存指令分别在不同的功能部件执行。在load-store结构中,运算器只需比较指令的寄存器号来判断指令间的数据相关,访存部件只需比较访存指令的地址来判断指令间的数据相关,从而支持高效的流水线、多发射及乱序执行技术。X86系列从Pentium III开始,把CISC指令翻译成若干RISC微操作以提高指令流水线效率,如Haswell微结构最多允许192个内部微操作乱序执行。 75 | 76 | 指令流水线技术。指令流水线把一条指令的执行划分为若干阶段(如分为取指、译码、执行、访存、写回阶段)来减少每个时钟周期的工作量,从而提高主频;并允许多条指令的不同阶段重叠执行实现并行处理(如一条指令处于执行阶段时,另一条指令处于译码阶段)。虽然同一条指令的执行时间没有变短,但处理器在单位时间内执行的指令数增加了。 77 | 78 | 指令流水线的执行单元包括算术和逻辑运算部件(Arithmetic Logic Units,简称ALU)、浮点运算部件(Floating Point Units,简称FPU)、向量运算部件、访存部件、转移部件等。这些部件在流水线的调度下具体执行指令规定的操作。运算部件的个数和延迟,访存部件的存储层次、容量和带宽,以及转移部件的转移猜测算法是决定微结构性能的重要因素。 79 | 80 | Cache技术。随着工艺技术的发展,处理器的运算速度和内存容量按摩尔定律的预测指数增加,但内存速度提高非常缓慢,与处理器速度的提高形成了“剪刀差”。 81 | 82 | 工艺技术的上述特点使得访存延迟成为以存储器为中心的冯诺依曼结构的主要瓶颈。Cache技术利用程序访问内存的时间局部性(一个单元如果当前被访问,则近期很有可能被访问)和空间局部性(一个单元被访问后,与之相邻的单元也很有可能被访问),使用速度较快、容量较小的Cache临时保存处理器常用的数据,使得处理器的多数访存操作可以在Cache上快速进行,只有少量访问Cache不命中的访存操作才访问内存。Cache是内存的映像,其内容是内存内容的子集,处理器访问Cache和访问内存使用相同的地址。从1980年代开始,RISC处理器就开始在处理器芯片内集成KB级的小容量Cache。现代处理器则普遍在片内集成多级Cache,典型的多核处理器每个处理器核一级指令和数据Cache各几十KB,二级Cache为几百KB,而多核共享的三级Cache为几MB到几十MB。 83 | 84 | Cache技术和指令流水线技术相得益彰。访问处理器片外内存的长延迟使流水线很难发挥作用,使用片内Cache可以有效降低流水线的访存时间,提高流水线效率。Cache容量越大,则流水线效率越高,处理器性能越高。 85 | 86 | 转移预测技术。冯诺依曼结构指令驱动执行的特点使转移指令成为提高流水线效率的瓶颈。典型应用程序平均每5-10条指令中就有一条转移指令,而转移指令的后续指令需要等待转移指令执行结果确定后才能取指,导致转移指令和后续指令之间不能重叠执行,降低了流水线效率。随着主频的提高,现代处理器流水线普遍在10-20级之间,由于转移指令引起的流水线阻塞成为提高指令流水线效率的重要瓶颈。 87 | 88 | 转移预测技术可以消除转移指令引起的指令流水线阻塞。转移预测器根据当前转移指令或其它转移指令的历史行为,在转移指令的取指或译码阶段预测该转移指令的跳转方向和目标地址并进行后续指令的取指。转移指令执行后,根据已经确定的跳转方向和目标地址对预测结果进行修正。如果发生转移预测错误,还需要取消指令流水线中的后续指令。为了提高预测精度并降低预测错误时的流水线开销,现代高性能处理器采用了复杂的转移预测器。 89 | 90 | 乱序执行技术。如果指令i是条长延迟指令,如除法指令或Cache不命中的访存指令,那么在顺序指令流水线中指令i后面的指令需要在流水线中等待很长时间。乱序执行技术通过指令动态调度允许指令i后面的源操作数准备好的指令越过指令i执行(需要使用指令i的运算结果的指令由于源操作数没有准备好,不会越过指令i执行),以提高指令流水线效率。为此,在指令译码之后的读寄存器阶段,判断指令需要的操作数是否准备好。如果操作数已经准备好,就进入执行阶段;如果操作数没有准备好,就进入称为保留站或者发射队列的队列中等待,直到操作数准备好后再进入执行阶段。为了保证执行结果符合程序规定的要求,乱序执行的指令需要有序结束。为此,执行完的指令均进入一个称为重排序缓存(Reorder Buffer,简称ROB)的队列,并把执行结果临时写入重命名寄存器。ROB根据指令进入流水线的次序有序提交指令的执行结果到目标寄存器或存储器。CDC6600和IBM 360/91分别使用计分板和保留站最早实现了指令的动态调度。 91 | 92 | 重命名寄存器与指令访问的结构寄存器相对应。为了避免多条指令访问同一个结构寄存器而使该寄存器成为串行化瓶颈,指令流水线可以把对该结构寄存器的访问定向到重命名寄存器。乱序执行流水线把指令执行结果写入重命名寄存器而不是结构寄存器,以避免破坏结构寄存器的内容,到顺序提交阶段再把重命名寄存器内容写入结构寄存器。两组执行不同运算但使用同一结构寄存器的指令可以使用不同的重命名寄存器,从而实现并行执行。 93 | 94 | 超标量。工艺技术的发展使得在1980年代后期出现了超标量处理器。超标量结构允许指令流水线的每一阶段同时处理多条指令。例如Alpha 21264处理器每拍可以取四条指令,发射六条指令,写回六条指令,提交11条指令。如果把单发射结构比作单车道马路,多发射结构就是多车道马路。 95 | 96 | 由于超标量结构的指令和数据通路都变宽了,使得寄存器端口、保留站端口、ROB端口、功能部件数都需要增加,例如Alpha 21264的寄存器堆有8个读端口和6个写端口,数据Cache的RAM通过倍频支持一拍两次访问。现代超标量处理器一般包含两个以上访存部件,两个以上定点运算部件以及两个以上浮点运算部件。超标量结构在指令译码或寄存器重命名时不仅要判断前后拍指令的数据相关,还需要判断同一拍中多条指令间的数据相关。 97 | 98 | 99 | 100 | 5、并行体系结构 101 | 102 | 并行体系结构是并行计算机系统的组织结构,通过把任务划分为多个进程或线程,让不同的处理器并行运行不同的进程或线程来提高性能。此外,随着处理器访存延迟的增加,Cache失效导致流水线长时间堵塞,处理器可以在一个线程等待长时间访存时快速切换到另一个线程执行以提高流水线效率。 103 | 104 | 多进程并行存在于多个操作系统之间或一个操作系统之内。用于高性能计算的MPI并行程序以及机群数据库是存在于多个操作系统之间的多进程并行的典型应用;由操作系统调度的多道程序则是操作系统之内多进程并行的典型应用。多线程并行只存在于一个操作系统之内。线程的粒度比进程小,线程的上下文也比进程简单。传统的多线程切换由操作系统调度并保留上下文,现代处理器通过硬件实现多份线程上下文来支持单周期的多线程切换。同时多线程(Simultaneous Multi-Threading,简称SMT)技术甚至允许超标量指令流水线的同一流水级同时运行来自不同线程的指令。现代处理器还通过硬件实现多份操作系统上下文来支持多个操作系统的快速切换,以提高云计算虚拟机的效率。 105 | 106 | 并行处理结构普遍存在于传统的大型机、服务器和高端工作站中。包含2-8个CPU芯片的小规模并行服务器和工作站一直是事务处理市场的主流产品。包含16-1024个CPU芯片的大型计算机在大型企业的信息系统中比较普遍。用于科学和工程计算的高性能计算机则往往包含上万个CPU芯片。随着集成电路集成度的不断提高,把多个CPU集成在单个芯片内部的多核CPU逐渐成为主流的CPU芯片产品。多核CPU芯片最早出现在嵌入式领域,把多个比较简单的CPU集成在一个芯片上。2005年个人计算机CPU芯片开始集成两个CPU核。现在的市场主流个人计算机CPU芯片一般集成2-4个CPU核,服务器CPU芯片则集成8-32个CPU核,专用处理器如GPU则集成几百到上千个处理核心。 107 | 108 | 并行处理系统通过互连网络把多个处理器连接成一个整体。常见的互连网络包括总线、交叉开关、环状网络、树形网络、二维或更多维网格等。并行系统的多个处理器之间需要同步机制来协同多处理器工作。常见的同步机制包括锁(Lock)、栅栏(Barrier)以及事务内存(Transaction Memory)等,计算机指令系统通常要设置专用的同步指令。 109 | 110 | 在共享存储的并行处理系统中,同一个内存单元(一般以Cache 行为单位)在不同的处理器中有多个备份,需要通过存储一致性模型(Memory Consistency Model)规定多个处理器访问共享内存的一致性标准。典型的存储一致性模型包括顺序一致性(Sequential Consistency),处理器一致性(Processor Consistency),弱一致性(Weak Consistency),释放一致性(Release Consistency)等。高速缓存一致性协议(Cache Coherence Protocol)把一个处理器新写的值传播给其它处理器,以达到存储一致性的目的。在侦听协议(Snoopy Protocol)中,写共享单元的处理器把写信息通过广播告知其它处理器;在基于目录的协议(Directory-based Protocol)中,每个存储单元对应一个目录项记录拥有该存储单元的副本的那些处理器号,写共享单元的处理器根据目录项的记录把写信息告知其它处理器。 111 | 112 | 113 | 114 | 6、体系结构的设计目标和方法 115 | 116 | 体系结构设计的主要指标包括性能、价格和功耗,其它指标包括安全性、可靠性、使用寿命等。体系结构设计的主要目标经历了大型机时代一味追求性能(Performance per Second),到个人计算机时代追求性能价格比(Performance per Dollar),再到移动互联时代追求性能功耗比(Performance per Watt)的转变。性能是计算机体系结构的首要设计目标。 117 | 118 | 性能的最本质定义是“完成一个或多个任务所需要的时间”。完成一个任务所需要的时间由完成该任务需要的指令数、完成每条指令需要的拍数以及每拍需要的时间三个量相乘得到。完成任务需要的指令数与算法、编译器和指令的功能有关;每条指令执行拍数(Cycles Per Instruction,简称CPI)或每拍执行指令数(Instructions Per Cycle简称IPC)与编译、指令功能、微结构设计相关;每拍需要的时间,也就是时钟周期,与微结构、电路设计、工艺等因素有关。 119 | 120 | 为了满足应用需求并不断提高性能,计算机体系结构在发展过程中遵循一些基本原则和方法,包括平衡性、局部性、并行性和虚拟化。 121 | 122 | 结构设计的第一个方法就是平衡设计。计算机是个复杂系统,影响性能的因素很多。结构设计要统筹兼顾,使各种影响性能的因素达到均衡。通用CPU设计有一个关于计算性能和访存带宽平衡的经验法则,即峰值浮点运算速度(MFLOPS)和峰值访存带宽(MB/s)为1:1左右。计算机体系结构中有一个著名的阿姆达尔(Amdahl)定律。该定律指出通过使用某种较快的执行方式所获得的性能的提高,受限于不可使用这种方式提高性能的执行时间所占总执行时间的百分比,例如一个程序的并行加速比,最终受限于不能被并行化的串行部分。 123 | 124 | 结构设计的第二个方法是利用局部性。当结构设计基本平衡以后,性能优化要抓主要矛盾,重点改进最频繁发生事件的执行效率。结构设计经常利用局部性加快经常性事件的速度。RISC指令系统利用指令的事件局部性对频繁发生的指令进行重点优化。硬件转移预测利用转移指令跳转方向的局部性,即同一条转移指令在执行时经常往同一个方向跳转。Cache和预取利用访存的时间和空间局部性优化性能。 125 | 126 | 结构设计的第三个方法是开发并行性。计算机中可以开发三种层次的并行性。第一个层次的并行性是指令级并行,包括时间并行即指令流水线,以及空间并行即超标量技术。1980年代RISC出现后,指令级并行开发达到了一个顶峰,2010年后进一步挖掘指令级并行的空间已经不大。第二个层次的并行性是数据级并行,主要指单指令流多数据流(Single Instruction Multiple Data,简称SIMD)的向量结构。二十世纪七、八十年代以Cray为代表的向量机十分流行;现代通用CPU普遍支持短向量运算,如X86的AVX指令支持256位短向量运算。第三个层次的并行性是任务级并行,包括进程级和线程级并行。上述三种并行性在现代计算机中都存在,多核CPU运行线程级或进程级并行的程序,每个核采用超标量流水线结构,并支持SIMD向量指令。 127 | 128 | 结构设计的第四个方法是虚拟化。所谓虚拟化,就是“用起来是这样的,实际上是那样的”,或者“逻辑上是这样的,物理上是那样的”。结构设计者宁愿自己多费事,也要尽量为用户提供一个友好的使用界面。如虚拟存储为每个进程提供独立的存储空间,虚实地址转换和物理内存分配都由CPU和操作系统自动完成,大大解放了程序员的生产力。多线程和虚拟机技术通过硬件支持多个线程上下文或操作系统上下文的快速切换,在一个CPU上“同时”运行多个线程或操作系统,把单个CPU虚拟成多个CPU。此外,流水线和多发射技术在维持串行编程模型的情况下提高了速度;Cache技术使程序员看到一个像Cache那么快,像内存那么大的存储空间;Cache一致性协议在分布式存储的情况下给程序员提供一个统一的存储空间;这些都是虚拟化方法的体现。 129 | 130 | \newpage 131 | -------------------------------------------------------------------------------- /renv.lock: -------------------------------------------------------------------------------- 1 | { 2 | "R": { 3 | "Version": "3.6.3", 4 | "Repositories": [ 5 | { 6 | "Name": "CRAN", 7 | "URL": "https://cloud.r-project.org" 8 | } 9 | ] 10 | }, 11 | "Packages": { 12 | "R6": { 13 | "Package": "R6", 14 | "Version": "2.5.1", 15 | "Source": "Repository", 16 | "Repository": "CRAN", 17 | "Hash": "470851b6d5d0ac559e9d01bb352b4021" 18 | }, 19 | "Rcpp": { 20 | "Package": "Rcpp", 21 | "Version": "1.0.7", 22 | "Source": "Repository", 23 | "Repository": "CRAN", 24 | "Hash": "dab19adae4440ae55aa8a9d238b246bb" 25 | }, 26 | "base64enc": { 27 | "Package": "base64enc", 28 | "Version": "0.1-3", 29 | "Source": "Repository", 30 | "Repository": "CRAN", 31 | "Hash": "543776ae6848fde2f48ff3816d0628bc" 32 | }, 33 | "bit": { 34 | "Package": "bit", 35 | "Version": "4.0.4", 36 | "Source": "Repository", 37 | "Repository": "CRAN", 38 | "Hash": "f36715f14d94678eea9933af927bc15d" 39 | }, 40 | "bit64": { 41 | "Package": "bit64", 42 | "Version": "4.0.5", 43 | "Source": "Repository", 44 | "Repository": "CRAN", 45 | "Hash": "9fe98599ca456d6552421db0d6772d8f" 46 | }, 47 | "bookdown": { 48 | "Package": "bookdown", 49 | "Version": "0.24", 50 | "Source": "Repository", 51 | "Repository": "CRAN", 52 | "Hash": "3837766a1e1b527af25fa3e2d12a2800" 53 | }, 54 | "cli": { 55 | "Package": "cli", 56 | "Version": "3.1.0", 57 | "Source": "Repository", 58 | "Repository": "CRAN", 59 | "Hash": "66a3834e54593c89d8beefb312347e58" 60 | }, 61 | "clipr": { 62 | "Package": "clipr", 63 | "Version": "0.7.1", 64 | "Source": "Repository", 65 | "Repository": "CRAN", 66 | "Hash": "ebaa97ac99cc2daf04e77eecc7b781d7" 67 | }, 68 | "cpp11": { 69 | "Package": "cpp11", 70 | "Version": "0.4.0", 71 | "Source": "Repository", 72 | "Repository": "CRAN", 73 | "Hash": "40ba3fd26c8f61d8d14d334bc7761df9" 74 | }, 75 | "crayon": { 76 | "Package": "crayon", 77 | "Version": "1.4.1", 78 | "Source": "Repository", 79 | "Repository": "CRAN", 80 | "Hash": "e75525c55c70e5f4f78c9960a4b402e9" 81 | }, 82 | "data.table": { 83 | "Package": "data.table", 84 | "Version": "1.14.2", 85 | "Source": "Repository", 86 | "Repository": "CRAN", 87 | "Hash": "36b67b5adf57b292923f5659f5f0c853" 88 | }, 89 | "digest": { 90 | "Package": "digest", 91 | "Version": "0.6.28", 92 | "Source": "Repository", 93 | "Repository": "CRAN", 94 | "Hash": "49b5c6e230bfec487b8917d5a0c77cca" 95 | }, 96 | "dplyr": { 97 | "Package": "dplyr", 98 | "Version": "1.0.7", 99 | "Source": "Repository", 100 | "Repository": "CRAN", 101 | "Hash": "36f1ae62f026c8ba9f9b5c9a08c03297" 102 | }, 103 | "ellipsis": { 104 | "Package": "ellipsis", 105 | "Version": "0.3.2", 106 | "Source": "Repository", 107 | "Repository": "CRAN", 108 | "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077" 109 | }, 110 | "evaluate": { 111 | "Package": "evaluate", 112 | "Version": "0.14", 113 | "Source": "Repository", 114 | "Repository": "CRAN", 115 | "Hash": "ec8ca05cffcc70569eaaad8469d2a3a7" 116 | }, 117 | "fansi": { 118 | "Package": "fansi", 119 | "Version": "0.5.0", 120 | "Source": "Repository", 121 | "Repository": "CRAN", 122 | "Hash": "d447b40982c576a72b779f0a3b3da227" 123 | }, 124 | "fastmap": { 125 | "Package": "fastmap", 126 | "Version": "1.1.0", 127 | "Source": "Repository", 128 | "Repository": "CRAN", 129 | "Hash": "77bd60a6157420d4ffa93b27cf6a58b8" 130 | }, 131 | "flextable": { 132 | "Package": "flextable", 133 | "Version": "0.6.10.001", 134 | "Source": "GitHub", 135 | "RemoteType": "github", 136 | "RemoteHost": "api.github.com", 137 | "RemoteUsername": "foxsen", 138 | "RemoteRepo": "flextable", 139 | "RemoteRef": "master", 140 | "RemoteSha": "30cb59a02e2de1c8e5fd795e77259c860c384d0d", 141 | "Hash": "8cd4dacc5c76330ec55a3a6ccf95a064" 142 | }, 143 | "ftExtra": { 144 | "Package": "ftExtra", 145 | "Version": "0.2.0", 146 | "Source": "Repository", 147 | "Repository": "CRAN", 148 | "Hash": "fe602b946aa77e12f5d9be91327f903c" 149 | }, 150 | "gdtools": { 151 | "Package": "gdtools", 152 | "Version": "0.2.3.9000", 153 | "Source": "GitHub", 154 | "RemoteType": "github", 155 | "RemoteHost": "api.github.com", 156 | "RemoteUsername": "foxsen", 157 | "RemoteRepo": "gdtools", 158 | "RemoteRef": "main", 159 | "RemoteSha": "b2caddd09672eceac0a6f1952f1d41c504974aa4", 160 | "Hash": "fa2826a504b6f80dca326df8f79bf5da" 161 | }, 162 | "generics": { 163 | "Package": "generics", 164 | "Version": "0.1.1", 165 | "Source": "Repository", 166 | "Repository": "CRAN", 167 | "Hash": "3f6bcfb0ee5d671d9fd1893d2faa79cb" 168 | }, 169 | "glue": { 170 | "Package": "glue", 171 | "Version": "1.4.2", 172 | "Source": "Repository", 173 | "Repository": "CRAN", 174 | "Hash": "6efd734b14c6471cfe443345f3e35e29" 175 | }, 176 | "highr": { 177 | "Package": "highr", 178 | "Version": "0.9", 179 | "Source": "Repository", 180 | "Repository": "CRAN", 181 | "Hash": "8eb36c8125038e648e5d111c0d7b2ed4" 182 | }, 183 | "hms": { 184 | "Package": "hms", 185 | "Version": "1.1.1", 186 | "Source": "Repository", 187 | "Repository": "CRAN", 188 | "Hash": "5b8a2dd0fdbe2ab4f6081e6c7be6dfca" 189 | }, 190 | "htmltools": { 191 | "Package": "htmltools", 192 | "Version": "0.5.2", 193 | "Source": "Repository", 194 | "Repository": "CRAN", 195 | "Hash": "526c484233f42522278ab06fb185cb26" 196 | }, 197 | "jquerylib": { 198 | "Package": "jquerylib", 199 | "Version": "0.1.4", 200 | "Source": "Repository", 201 | "Repository": "CRAN", 202 | "Hash": "5aab57a3bd297eee1c1d862735972182" 203 | }, 204 | "jsonlite": { 205 | "Package": "jsonlite", 206 | "Version": "1.7.2", 207 | "Source": "Repository", 208 | "Repository": "CRAN", 209 | "Hash": "98138e0994d41508c7a6b84a0600cfcb" 210 | }, 211 | "knitr": { 212 | "Package": "knitr", 213 | "Version": "1.36", 214 | "Source": "Repository", 215 | "Repository": "CRAN", 216 | "Hash": "46344b93f8854714cdf476433a59ed10" 217 | }, 218 | "lifecycle": { 219 | "Package": "lifecycle", 220 | "Version": "1.0.1", 221 | "Source": "Repository", 222 | "Repository": "CRAN", 223 | "Hash": "a6b6d352e3ed897373ab19d8395c98d0" 224 | }, 225 | "magrittr": { 226 | "Package": "magrittr", 227 | "Version": "2.0.1", 228 | "Source": "Repository", 229 | "Repository": "CRAN", 230 | "Hash": "41287f1ac7d28a92f0a286ed507928d3" 231 | }, 232 | "officer": { 233 | "Package": "officer", 234 | "Version": "0.4.0", 235 | "Source": "Repository", 236 | "Repository": "CRAN", 237 | "Hash": "a49b891db0ebf34b40e6b1fb1ec9d3ad" 238 | }, 239 | "pillar": { 240 | "Package": "pillar", 241 | "Version": "1.6.4", 242 | "Source": "Repository", 243 | "Repository": "CRAN", 244 | "Hash": "60200b6aa32314ac457d3efbb5ccbd98" 245 | }, 246 | "pkgconfig": { 247 | "Package": "pkgconfig", 248 | "Version": "2.0.3", 249 | "Source": "Repository", 250 | "Repository": "CRAN", 251 | "Hash": "01f28d4278f15c76cddbea05899c5d6f" 252 | }, 253 | "prettyunits": { 254 | "Package": "prettyunits", 255 | "Version": "1.1.1", 256 | "Source": "Repository", 257 | "Repository": "CRAN", 258 | "Hash": "95ef9167b75dde9d2ccc3c7528393e7e" 259 | }, 260 | "progress": { 261 | "Package": "progress", 262 | "Version": "1.2.2", 263 | "Source": "Repository", 264 | "Repository": "CRAN", 265 | "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061" 266 | }, 267 | "purrr": { 268 | "Package": "purrr", 269 | "Version": "0.3.4", 270 | "Source": "Repository", 271 | "Repository": "CRAN", 272 | "Hash": "97def703420c8ab10d8f0e6c72101e02" 273 | }, 274 | "readr": { 275 | "Package": "readr", 276 | "Version": "2.0.2", 277 | "Source": "Repository", 278 | "Repository": "CRAN", 279 | "Hash": "7cb2c3ecfbc2c6786221d2c0c1f6ed68" 280 | }, 281 | "renv": { 282 | "Package": "renv", 283 | "Version": "0.14.0", 284 | "Source": "Repository", 285 | "Repository": "CRAN", 286 | "Hash": "30e5eba91b67f7f4d75d31de14bbfbdc" 287 | }, 288 | "rlang": { 289 | "Package": "rlang", 290 | "Version": "0.4.12", 291 | "Source": "Repository", 292 | "Repository": "CRAN", 293 | "Hash": "0879f5388fe6e4d56d7ef0b7ccb031e5" 294 | }, 295 | "rmarkdown": { 296 | "Package": "rmarkdown", 297 | "Version": "2.11", 298 | "Source": "Repository", 299 | "Repository": "CRAN", 300 | "Hash": "320017b52d05a943981272b295750388" 301 | }, 302 | "stringi": { 303 | "Package": "stringi", 304 | "Version": "1.7.5", 305 | "Source": "Repository", 306 | "Repository": "CRAN", 307 | "Hash": "cd50dc9b449de3d3b47cdc9976886999" 308 | }, 309 | "stringr": { 310 | "Package": "stringr", 311 | "Version": "1.4.0", 312 | "Source": "Repository", 313 | "Repository": "CRAN", 314 | "Hash": "0759e6b6c0957edb1311028a49a35e76" 315 | }, 316 | "systemfonts": { 317 | "Package": "systemfonts", 318 | "Version": "1.0.3", 319 | "Source": "Repository", 320 | "Repository": "CRAN", 321 | "Hash": "5be9fcf8ef6763e8cb13ab009e273a1d" 322 | }, 323 | "tibble": { 324 | "Package": "tibble", 325 | "Version": "3.1.5", 326 | "Source": "Repository", 327 | "Repository": "CRAN", 328 | "Hash": "36eb05ad4cfdfeaa56f5a9b2a1311efd" 329 | }, 330 | "tidyr": { 331 | "Package": "tidyr", 332 | "Version": "1.1.4", 333 | "Source": "Repository", 334 | "Repository": "CRAN", 335 | "Hash": "c8fbdbd9fcac223d6c6fe8e406f368e1" 336 | }, 337 | "tidyselect": { 338 | "Package": "tidyselect", 339 | "Version": "1.1.1", 340 | "Source": "Repository", 341 | "Repository": "CRAN", 342 | "Hash": "7243004a708d06d4716717fa1ff5b2fe" 343 | }, 344 | "tinytex": { 345 | "Package": "tinytex", 346 | "Version": "0.34", 347 | "Source": "Repository", 348 | "Repository": "CRAN", 349 | "Hash": "043daa786f4d254f0031534150e28d42" 350 | }, 351 | "tzdb": { 352 | "Package": "tzdb", 353 | "Version": "0.2.0", 354 | "Source": "Repository", 355 | "Repository": "CRAN", 356 | "Hash": "5e069fb033daf2317bd628d3100b75c5" 357 | }, 358 | "utf8": { 359 | "Package": "utf8", 360 | "Version": "1.2.2", 361 | "Source": "Repository", 362 | "Repository": "CRAN", 363 | "Hash": "c9c462b759a5cc844ae25b5942654d13" 364 | }, 365 | "uuid": { 366 | "Package": "uuid", 367 | "Version": "1.0-2", 368 | "Source": "Repository", 369 | "Repository": "CRAN", 370 | "Hash": "a24851e188aeed413d469d7f4f229000" 371 | }, 372 | "vctrs": { 373 | "Package": "vctrs", 374 | "Version": "0.3.8", 375 | "Source": "Repository", 376 | "Repository": "CRAN", 377 | "Hash": "ecf749a1b39ea72bd9b51b76292261f1" 378 | }, 379 | "vroom": { 380 | "Package": "vroom", 381 | "Version": "1.5.5", 382 | "Source": "Repository", 383 | "Repository": "CRAN", 384 | "Hash": "9c3b3a3f947c7936cea7485349247e5b" 385 | }, 386 | "withr": { 387 | "Package": "withr", 388 | "Version": "2.4.2", 389 | "Source": "Repository", 390 | "Repository": "CRAN", 391 | "Hash": "ad03909b44677f930fa156d47d7a3aeb" 392 | }, 393 | "xfun": { 394 | "Package": "xfun", 395 | "Version": "0.27", 396 | "Source": "Repository", 397 | "Repository": "CRAN", 398 | "Hash": "12b69332f085d350fc1f2ea6cca58397" 399 | }, 400 | "xml2": { 401 | "Package": "xml2", 402 | "Version": "1.3.2", 403 | "Source": "Repository", 404 | "Repository": "CRAN", 405 | "Hash": "d4d71a75dd3ea9eb5fa28cc21f9585e2" 406 | }, 407 | "yaml": { 408 | "Package": "yaml", 409 | "Version": "2.2.1", 410 | "Source": "Repository", 411 | "Repository": "CRAN", 412 | "Hash": "2826c5d9efb0a88f657c7a679c7106db" 413 | }, 414 | "zip": { 415 | "Package": "zip", 416 | "Version": "2.2.0", 417 | "Source": "Repository", 418 | "Repository": "CRAN", 419 | "Hash": "c7eef2996ac270a18c2715c997a727c5" 420 | } 421 | } 422 | } 423 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution-NonCommercial 4.0 International Public License 2 | 3 | By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. 4 | 5 | Section 1 – Definitions. 6 | 7 | Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. 8 | Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. 9 | Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. 10 | Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. 11 | Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. 12 | Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License. 13 | Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. 14 | Licensor means the individual(s) or entity(ies) granting rights under this Public License. 15 | NonCommercial means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange. 16 | Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. 17 | Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. 18 | You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. 19 | Section 2 – Scope. 20 | 21 | License grant. 22 | Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: 23 | reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and 24 | produce, reproduce, and Share Adapted Material for NonCommercial purposes only. 25 | Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. 26 | Term. The term of this Public License is specified in Section 6(a). 27 | Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. 28 | Downstream recipients. 29 | Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. 30 | No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. 31 | No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). 32 | Other rights. 33 | 34 | Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. 35 | Patent and trademark rights are not licensed under this Public License. 36 | To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes. 37 | Section 3 – License Conditions. 38 | 39 | Your exercise of the Licensed Rights is expressly made subject to the following conditions. 40 | 41 | Attribution. 42 | 43 | If You Share the Licensed Material (including in modified form), You must: 44 | 45 | retain the following if it is supplied by the Licensor with the Licensed Material: 46 | identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); 47 | a copyright notice; 48 | a notice that refers to this Public License; 49 | a notice that refers to the disclaimer of warranties; 50 | a URI or hyperlink to the Licensed Material to the extent reasonably practicable; 51 | indicate if You modified the Licensed Material and retain an indication of any previous modifications; and 52 | indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. 53 | You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. 54 | If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. 55 | If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. 56 | Section 4 – Sui Generis Database Rights. 57 | 58 | Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: 59 | 60 | for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only; 61 | if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and 62 | You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. 63 | For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. 64 | Section 5 – Disclaimer of Warranties and Limitation of Liability. 65 | 66 | Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You. 67 | To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You. 68 | The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. 69 | Section 6 – Term and Termination. 70 | 71 | This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. 72 | Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: 73 | 74 | automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or 75 | upon express reinstatement by the Licensor. 76 | For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. 77 | For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. 78 | Sections 1, 5, 6, 7, and 8 survive termination of this Public License. 79 | Section 7 – Other Terms and Conditions. 80 | 81 | The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. 82 | Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. 83 | Section 8 – Interpretation. 84 | 85 | For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. 86 | To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. 87 | No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. 88 | Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. 89 | --------------------------------------------------------------------------------