├── internal
    ├── os
    │   └── elf
    │   │   ├── section.go
    │   │   ├── debug.go
    │   │   ├── prog.go
    │   │   ├── writer.go
    │   │   ├── reader.go
    │   │   └── file.go
    ├── prog
    │   ├── file.go
    │   └── error.go
    ├── arch
    │   └── arch.go
    ├── utils
    │   └── convert.go
    └── reader
    │   ├── string.go
    │   ├── reader.go
    │   ├── decimal_test.go
    │   ├── reader_test.go
    │   └── decimal.go
├── go.mod
├── .gitignore
├── compiler
    ├── assemble
    │   ├── internal
    │   │   ├── instr.go
    │   │   ├── program.go
    │   │   ├── operand.go
    │   │   ├── label.go
    │   │   ├── lexer.go
    │   │   ├── token.go
    │   │   └── parser.go
    │   └── main.go
    ├── compile
    │   ├── parser
    │   │   ├── parser_block.go
    │   │   ├── lexer.go
    │   │   ├── params.go
    │   │   ├── parser.go
    │   │   ├── types.go
    │   │   └── parser_exp.go
    │   ├── main.go
    │   ├── ast
    │   │   └── scope.go
    │   └── token
    │   │   ├── token.go
    │   │   └── file.go
    └── link
    │   └── main.go
├── go.sum
├── LICENSE
├── README.md
└── docs
    ├── ascii.md
    └── 汇编语法详解.md


/internal/os/elf/section.go:
--------------------------------------------------------------------------------
1 | package elf
2 | 
3 | // Section 段信息
4 | type Section struct {
5 | 	Name   string // 名称
6 | 	Offset int    // 偏移
7 | 	Length int    // 内容大小
8 | }
9 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
 1 | module github.com/facelang/face
 2 | 
 3 | go 1.24
 4 | 
 5 | require (
 6 | 	github.com/olekukonko/tablewriter v0.0.5
 7 | 	golang.org/x/arch v0.17.0
 8 | )
 9 | 
10 | require github.com/mattn/go-runewidth v0.0.9 // indirect
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | .idea/
 3 | .vscode/
 4 | .DS_Store
 5 | 
 6 | **/go.sum
 7 | **/*.exe
 8 | **/*.exe~
 9 | **/*.dll
10 | **/*.so
11 | **/*.dylib
12 | **/*.exe
13 | 
14 | # 编译输出
15 | /bin/
16 | /pkg/
17 | /pkgs/
18 | /compiler/provider/
19 | /example/
20 | /tools/


--------------------------------------------------------------------------------
/compiler/assemble/internal/instr.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | // instr 表示一条汇编指令
 4 | type instr struct {
 5 | 	Opcode string    // 操作码
 6 | 	Src    *operand  // 源操作数
 7 | 	Dst    *operand  // 目标操作数
 8 | 	Size   int       // 操作数大小(byte/word/dword/qword)
 9 | }
10 | 


--------------------------------------------------------------------------------
/internal/prog/file.go:
--------------------------------------------------------------------------------
 1 | package prog
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | )
 6 | 
 7 | type FilePos struct {
 8 | 	Filename          string
 9 | 	Col, Line, Offset int
10 | }
11 | 
12 | func (i *FilePos) String() string {
13 | 	return fmt.Sprintf("行: %d, 列: %d, 文件名：%s", i.Line+1, i.Col+1, i.Filename)
14 | }
15 | 


--------------------------------------------------------------------------------
/compiler/compile/parser/parser_block.go:
--------------------------------------------------------------------------------
 1 | package parser
 2 | 
 3 | // ----------------------------------------------------------------------------
 4 | // Blocks
 5 | 
 6 | // 与函数 parseBlockStmt 完全等价！
 7 | //func (p *parser) parseBody() *ast.BlockStmt {
 8 | //	lbrace := p.expect(LBRACE) // {
 9 | //	list := p.parseStmtList()
10 | //	rbrace := p.expect(RBRACE) // }
11 | //
12 | //	return &ast.BlockStmt{Lbrace: lbrace, List: list, Rbrace: rbrace}
13 | //}
14 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/program.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | type ProgType byte
 4 | 
 5 | const (
 6 | 	Unknown ProgType = iota
 7 | 	Instr            // 指令
 8 | 	Label            // 符号定义
 9 | 	Section          // 段标记
10 | 	Global           // 全局符号
11 | 	Local            // 本地符号
12 | 	Type             // .type 指定类型
13 | 	Size             // .size 指定大小
14 | )
15 | 
16 | type Program struct {
17 | 	Type ProgType
18 | 	Name string
19 | 	Pc   int64
20 | }
21 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
2 | github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
3 | github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec=
4 | github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY=
5 | golang.org/x/arch v0.17.0 h1:4O3dfLzd+lQewptAHqjewQZQDyEdejz3VwgeYwkZneU=
6 | golang.org/x/arch v0.17.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk=
7 | 


--------------------------------------------------------------------------------
/compiler/link/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"face-lang/compiler/provider/link"
 5 | 	"fmt"
 6 | )
 7 | 
 8 | func main() {
 9 | 	//buf, err := os.ReadFile("common.t")
10 | 	//if err != nil {
11 | 	//	panic(err)
12 | 	//}
13 | 	//for _, b := range buf {
14 | 	//	fmt.Printf("%d, ", b)
15 | 	//}
16 | 	//_ = asm.Program("example/hello.s")
17 | 	//println("完成编译！")
18 | 	//file, _ := elf.ReadElf("common.o")
19 | 	//file.Objdump()
20 | 
21 | 	err := link.Link("example/hello", "example/common.s.o", "example/hello.s.o")
22 | 	if err != nil {
23 | 		panic(err)
24 | 	}
25 | 	fmt.Println("链接完成！")
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------
/internal/arch/arch.go:
--------------------------------------------------------------------------------
 1 | package arch
 2 | 
 3 | // Arch wraps the link architecture object with more architecture-specific information.
 4 | type Arch struct {
 5 | 	//*obj.LinkArch
 6 | 	// Map of instruction names to enumeration.
 7 | 	InstrTable map[string]obj.As
 8 | 	// Map of register names to enumeration.
 9 | 	Register map[string]int16
10 | 	// Table of register prefix names. These are things like R for R(0) and SPR for SPR(268).
11 | 	RegisterPrefix map[string]bool
12 | 	// RegisterNumber converts R(10) into arm.REG_R10.
13 | 	RegisterNumber func(string, int16) (int16, bool)
14 | 	// Instruction is a jump.
15 | 	IsJump func(word string) bool
16 | }
17 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/operand.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | // 操作数类型
 4 | const (
 5 | 	REGISTER  = iota // 寄存器
 6 | 	IMMEDIATE        // 立即数
 7 | 	MEMORY          // 内存引用
 8 | 	SYMBOL          // 符号/标签
 9 | )
10 | 
11 | type ExpType byte
12 | 
13 | const EXP_ADD ExpType = 1
14 | const EXP_SUB ExpType = 1
15 | const EXP_ADD ExpType = 1
16 | const EXP_ADD ExpType = 1
17 | 
18 | type OprType byte
19 | 
20 | const OPRTP_IMM OprType = 1
21 | const OPRTP_REG OprType = 2
22 | const OPRTP_MEM OprType = 3 // 地址类型，需要寻址
23 | const OPRTP_REL OprType = 4 // 符号类型，需要重定位
24 | 
25 | type Operand interface {
26 | 	operand()
27 | }
28 | 
29 | type Express interface {
30 | }
31 | 
32 | // operand 表示一个操作数
33 | type operand struct {
34 | 	Type  int    // 操作数类型
35 | 	Value string // 操作数值
36 | 	Base  string // 基址寄存器(用于内存引用)
37 | 	Index string // 变址寄存器(用于内存引用)
38 | 	Scale int    // 比例因子(用于内存引用)
39 | }
40 | 
41 | type ExpOpr struct {
42 | 	ExpList []Express
43 | }
44 | 
45 | type GenOpr struct {
46 | 	Type   OprType // 操作数类型(1 立即数， 2寄存器, 4寻址类型（表示会用到 ModRM 字段） )
47 | 	Value  int64   // 立即数？地址
48 | 	Length int     // 操作数宽度
49 | }
50 | 
51 | type RelOpr struct {
52 | 	Label string // 符号名称
53 | }
54 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Creative Commons Attribution 4.0 International License
 2 | 
 3 | Copyright (c) 2024 Facelang
 4 | 
 5 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
 6 | 
 7 | You are free to:
 8 | - Share — copy and redistribute the material in any medium or format
 9 | - Adapt — remix, transform, and build upon the material for any purpose, even commercially
10 | 
11 | Under the following terms:
12 | - Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
13 | 
14 | No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
15 | 
16 | Notices:
17 | - You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation.
18 | - No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material. 


--------------------------------------------------------------------------------
/compiler/assemble/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"github.com/facelang/face/compiler/assemble/internal"
 7 | 	"github.com/facelang/face/compiler/provider/asm"
 8 | 	"github.com/facelang/face/internal/os/elf"
 9 | 	"os"
10 | 	"path/filepath"
11 | 	"strings"
12 | )
13 | 
14 | var (
15 | 	Debug      = flag.Bool("debug", false, "启用调试模式，默认不启用")
16 | 	OutputFile = flag.String("o", "", "输出文件，默认跟输入文件保持一致")
17 | 	// todo 可以指定平台信息， 支持跨平台编译
18 | )
19 | 
20 | func Usage() {
21 | 	fmt.Fprintf(os.Stderr, "usage: asm [options] file.s ...\n")
22 | 	fmt.Fprintf(os.Stderr, "Flags:\n")
23 | 	flag.PrintDefaults()
24 | 	os.Exit(2)
25 | }
26 | 
27 | func main() {
28 | 	if flag.NArg() == 0 {
29 | 		flag.Usage()
30 | 	}
31 | 
32 | 	if *OutputFile == "" {
33 | 		if flag.NArg() != 1 {
34 | 			flag.Usage()
35 | 		}
36 | 		input := filepath.Base(flag.Arg(0))
37 | 		input = strings.TrimSuffix(input, ".s")
38 | 		*OutputFile = fmt.Sprintf("%s.o", input)
39 | 	}
40 | 
41 | 	for _, f := range flag.Args() {
42 | 		lexer := internal.NewLexer(f)
43 | 		parser := internal.NewParser(lexer)
44 | 		pList := new(obj.Plist)
45 | 		pList.Firstpc, ok = parser.Parse() // p.firstProg
46 | 
47 | 		obj.Flushplist(ctxt, pList, nil)
48 | 	}
49 | 
50 | 	buf, err := os.ReadFile("common.t")
51 | 	if err != nil {
52 | 		panic(err)
53 | 	}
54 | 	for _, b := range buf {
55 | 		fmt.Printf("%d, ", b)
56 | 	}
57 | 	_ = asm.Program("example/hello.s")
58 | 	println("完成编译！")
59 | 	file, _ := elf.ReadElf("common.o")
60 | 	file.Objdump()
61 | 
62 | }
63 | 


--------------------------------------------------------------------------------
/compiler/compile/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"github.com/facelang/face/compiler/compile/internal"
 7 | 	"github.com/facelang/face/compiler/compile/parser"
 8 | 	"github.com/facelang/face/compiler/provider/asm"
 9 | 	"github.com/facelang/face/internal/os/elf"
10 | 	"os"
11 | 	"path/filepath"
12 | 	"strings"
13 | )
14 | 
15 | var (
16 | 	Debug      = flag.Bool("debug", false, "启用调试模式，默认不启用")
17 | 	OutputFile = flag.String("o", "", "输出文件，默认跟输入文件保持一致")
18 | 	// todo 可以指定平台信息， 支持跨平台编译
19 | )
20 | 
21 | func Usage() {
22 | 	fmt.Fprintf(os.Stderr, "usage: asm [options] file.s ...\n")
23 | 	fmt.Fprintf(os.Stderr, "Flags:\n")
24 | 	flag.PrintDefaults()
25 | 	os.Exit(2)
26 | }
27 | 
28 | func main() {
29 | 	if flag.NArg() == 0 {
30 | 		flag.Usage()
31 | 	}
32 | 
33 | 	if *OutputFile == "" {
34 | 		if flag.NArg() != 1 {
35 | 			flag.Usage()
36 | 		}
37 | 		input := filepath.Base(flag.Arg(0))
38 | 		input = strings.TrimSuffix(input, ".s")
39 | 		*OutputFile = fmt.Sprintf("%s.o", input)
40 | 	}
41 | 
42 | 	for _, f := range flag.Args() {
43 | 		lexer := parser.NewLexer(f)
44 | 		parser := internal.NewParser(lexer)
45 | 		pList := new(obj.Plist)
46 | 		pList.Firstpc, ok = parser.Parse() // p.firstProg
47 | 
48 | 		obj.Flushplist(ctxt, pList, nil)
49 | 	}
50 | 
51 | 	buf, err := os.ReadFile("common.t")
52 | 	if err != nil {
53 | 		panic(err)
54 | 	}
55 | 	for _, b := range buf {
56 | 		fmt.Printf("%d, ", b)
57 | 	}
58 | 	_ = asm.Program("example/hello.s")
59 | 	println("完成编译！")
60 | 	file, _ := elf.ReadElf("common.o")
61 | 	file.Objdump()
62 | 
63 | }
64 | 


--------------------------------------------------------------------------------
/internal/utils/convert.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import (
 4 | 	"encoding/binary"
 5 | 	"math"
 6 | 	"strconv"
 7 | )
 8 | 
 9 | func Float2Bytes(val float64) []byte {
10 | 	buf := make([]byte, 8)
11 | 	binary.LittleEndian.PutUint64(buf, math.Float64bits(val))
12 | 	return buf
13 | }
14 | 
15 | func Float(lit string) float64 {
16 | 	val, err := strconv.ParseFloat(lit, 64)
17 | 	if err != nil {
18 | 		panic("无效的浮点数: " + lit)
19 | 	}
20 | 	return val
21 | }
22 | 
23 | func FloatBytes(lit string) []byte {
24 | 	return Float2Bytes(Float(lit))
25 | }
26 | 
27 | func Int2Bytes(val int64) []byte {
28 | 	buf := make([]byte, 8)
29 | 	binary.LittleEndian.PutUint64(buf, uint64(val))
30 | 	return buf
31 | }
32 | 
33 | func Int(lit string) []byte {
34 | 	if lit == "" {
35 | 		return make([]byte, 8) // 返回8字节的0
36 | 	}
37 | 	var val int64
38 | 	if lit[0] == '0' {
39 | 		if len(lit) == 1 {
40 | 			return make([]byte, 8)
41 | 		}
42 | 		switch lit[1] {
43 | 		case 'b', 'B': // 二进制
44 | 			v, err := strconv.ParseInt(lit[2:], 2, 64)
45 | 			if err != nil {
46 | 				panic("无效的二进制数字: " + lit)
47 | 			}
48 | 			val = v
49 | 		case 'x', 'X': // 十六进制
50 | 			v, err := strconv.ParseInt(lit[2:], 16, 64)
51 | 			if err != nil {
52 | 				panic("无效的十六进制数字: " + lit)
53 | 			}
54 | 			val = v
55 | 		case 'o', 'O': // 八进制
56 | 			v, err := strconv.ParseInt(lit[2:], 8, 64)
57 | 			if err != nil {
58 | 				panic("无效的八进制数字: " + lit)
59 | 			}
60 | 			val = v
61 | 		default: // 八进制（以0开头）
62 | 			v, err := strconv.ParseInt(lit, 8, 64)
63 | 			if err != nil {
64 | 				panic("无效的八进制数字: " + lit)
65 | 			}
66 | 			val = v
67 | 		}
68 | 	} else {
69 | 		// 十进制
70 | 		v, err := strconv.ParseInt(lit, 10, 64)
71 | 		if err != nil {
72 | 			panic("无效的十进制数字: " + lit)
73 | 		}
74 | 		val = v
75 | 	}
76 | 	return val
77 | }
78 | 
79 | func IntBytes(lit string) []byte {
80 | 	return Int2Bytes(Int(lit))
81 | }
82 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/label.go:
--------------------------------------------------------------------------------
 1 | package internal
 2 | 
 3 | import "fmt"
 4 | 
 5 | type LabelType uint8
 6 | 
 7 | const UNDEFINED_LABEL LabelType = 0 // 未定义
 8 | const TEXT_LABEL LabelType = 1      // 代码段符号
 9 | const EQU_LABEL LabelType = 2       // 常量
10 | const LOCAL_LABEL LabelType = 3     // 局部变量
11 | const EXTERNAL_LABEL LabelType = 4  // 外部变量, 提前申明的
12 | 
13 | type label struct {
14 | 	Name    string    // 标签名
15 | 	Type    LabelType // 标签类型
16 | 	Addr    int       // 地址
17 | 	Index   int       // 添加顺序， 从1开始
18 | 	Section string    // 段名
19 | 	Times   int       // 重复次数
20 | 	Size    int       // 字节长度
21 | 	Cont    []int     // 内容
22 | 	ContLen int       // 内容长度
23 | 	RelInfo bool      // 记录重定位信息
24 | }
25 | 
26 | // AddLabel 添加符号到符号表; 一共三处，equ 常量 仅数字 NewRecWithEqu， 变量 NewRecWithData,  代码段 TextLabel
27 | func (p *parser) AddLabel(name string, rec *label) {
28 | 	rec.Name = name // 缓存一次，减少后续查找名字
29 | 	if rec.Type == TEXT_LABEL || rec.Type == LOCAL_LABEL {
30 | 		rec.Addr = p.seg.Offset
31 | 		rec.Section = p.seg.Name
32 | 	}
33 | 
34 | 	// 更新地址, 除了具体的变量定义，这里都是 0， 没有变化
35 | 	p.seg.Offset += rec.Times * rec.Size * rec.ContLen
36 | 
37 | 	if i, ok := p.labelNames[name]; ok {
38 | 		labelRec := p.labelList[i]
39 | 		if labelRec.Type == UNDEFINED_LABEL {
40 | 			p.labelList[i] = rec // 直接替换
41 | 		} else {
42 | 			_ = fmt.Errorf("符号: %s 重复定义！", name)
43 | 		}
44 | 	} else {
45 | 		p.labelList = append(p.labelList, rec)
46 | 		p.labelNames[name] = len(p.labelList) - 1
47 | 	}
48 | }
49 | 
50 | // GetLabel 获取符号
51 | func (p *parser) GetLabel(name string) *label {
52 | 	if i, ok := p.labelNames[name]; ok {
53 | 		return p.labelList[i]
54 | 	}
55 | 
56 | 	// 只有符号引用符号时， 才会被创建
57 | 	// 未知符号，添加为外部符号(待重定位)
58 | 	rec := NewLabel(UNDEFINED_LABEL)
59 | 	rec.Name = name
60 | 	p.labelList = append(p.labelList, rec)
61 | 	p.labelNames[name] = len(p.labelList) - 1
62 | 	return rec
63 | }
64 | 
65 | func NewLabel(lType LabelType) *label {
66 | 	return &label{Type: lType}
67 | }
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Facelang
  2 | 
  3 | Facelang 是一个全新编程语言项目。该项目期望打造一门专为全栈工程师量身定制的跨端编译语言，目前主要基于 Go 实现。
  4 | 
  5 | 下一阶段，会优先使用 LLVM 完成语言的整体功能，确保语言基本可用。
  6 | 
  7 | 后期仍然计划参考 Go 实现更独立，更完善的汇编器和链接器，实现轻量化的编译。
  8 | 
  9 | 该项目目前属于个人维护，大部分功能实现完成度不高，开源的目的是希望对此项目感兴趣的朋友可以一起加入学习、探讨。
 10 | 
 11 | 项目中可能会出现诸多问题，还望大家多多包涵。
 12 | 
 13 | 🤘 🤘 🤘 自嗨 ing ...
 14 | 
 15 | 致歉。
 16 | 
 17 | ## 🚪 快速上手
 18 | 
 19 | - COME SOON ...
 20 | 
 21 | ## 🚀 项目特性
 22 | 
 23 | - 完整的编译器实现
 24 | - 汇编器支持
 25 | - 链接器功能
 26 | - 跨平台支持
 27 | - 跨端GUI支持
 28 | - 底层原理学习
 29 | 
 30 | ## 🛠️ 技术栈
 31 | 
 32 | - Go 语言
 33 | - x86 汇编
 34 | - Arm 汇编
 35 | - llvm
 36 | - C/C++
 37 | 
 38 | ## 📚 项目结构
 39 | 
 40 | ```
 41 | face-lang/
 42 | ├── compiler/     # 编译器实现
 43 | ├── docs/         # 文档
 44 | ├── example/      # 示例代码
 45 | ├── internal/     # 核心代码
 46 | ├── library/      # 标准库
 47 | └── tools/        # 反汇编工具
 48 | ```
 49 | 
 50 | ## 🎯 项目目标
 51 | 
 52 | 1. 实现一门完整的编程语言
 53 | 2. 提供跨平台编译支持
 54 | 3. 为全栈工程师提供高效的开发体验
 55 | 4. 探索和学习程序运行的底层原理
 56 | 
 57 | ## 🏃‍♂‍➡ 开发进度
 58 | 
 59 | - [x] Linux 平台支持
 60 | - [ ] Osx 平台支持
 61 | - [ ] Windows 平台支持
 62 | - [x] 基础汇编指令支持 `mov`、`cmp`、`sub`、`add`、`lea`、`call`、`int`、`imul`、`idiv`、`neg`、`inc`、`dec`、`jmp`、`je`、`jg`、`jl`、`jle`、`jne`、`jna`、`push`、`pop`
 63 | - [ ] 基于 LLVM 实现
 64 | - [ ] 文档完善
 65 | - [ ] 其它汇编指令支持
 66 | - [ ] 标准库完善
 67 | - [ ] 性能优化
 68 | 
 69 | ## 🤝 参与贡献
 70 | 
 71 | 欢迎所有对编程语言实现感兴趣的朋友参与项目开发！您可以通过以下方式参与：
 72 | 
 73 | 1. 提交 Issue 报告问题或建议
 74 | 2. 提交 Pull Request 贡献代码
 75 | 3. 完善项目文档
 76 | 4. 分享使用经验
 77 | 
 78 | ## 📝 项目笔记
 79 | 
 80 | TODO
 81 | 
 82 | ## 🔮 未来规划
 83 | 
 84 | - [ ] 实现更多语言特性
 85 | - [ ] 优化编译性能
 86 | - [ ] 提供更多平台支持
 87 | - [ ] 完善开发工具链
 88 | - [ ] 建立活跃的社区
 89 | 
 90 | ## 📄 开源协议
 91 | 
 92 | 本项目采用 [Creative Commons Attribution 4.0 International License](LICENSE) 协议开源。
 93 | 
 94 | 根据该协议，您可以：
 95 | - 自由分享和分发本项目
 96 | - 自由修改和构建本项目
 97 | - 用于任何目的，包括商业用途
 98 | 
 99 | 主要要求：
100 | - 必须注明原作者
101 | - 必须提供许可证链接
102 | - 必须说明是否做了修改
103 | 
104 | ## 🌟 致谢
105 | 
106 | 感谢所有为项目做出贡献的开发者！
107 | 
108 | ---
109 | 
110 | 欢迎关注项目，一起探索编程语言的奥秘！ 


--------------------------------------------------------------------------------
/internal/reader/string.go:
--------------------------------------------------------------------------------
 1 | package reader
 2 | 
 3 | import "fmt"
 4 | 
 5 | func Char(r *Reader) string {
 6 | 	ident, l := String(r, '\'')
 7 | 	if l != 1 {
 8 | 		panic(fmt.Errorf("invalid char literal"))
 9 | 	}
10 | 	return ident
11 | }
12 | 
13 | func String(r *Reader, quote byte) (string, int) {
14 | 	length := 0
15 | 	ch, ok := r.ReadByte() // read character after quote
16 | 	for ch != quote {
17 | 		if ch == '\n' || !ok {
18 | 			panic(fmt.Errorf("literal not terminated"))
19 | 		}
20 | 		if ch == '\\' {
21 | 			ch = escape(r, quote)
22 | 		} else {
23 | 			ch, ok = r.ReadByte()
24 | 		}
25 | 		length++
26 | 	}
27 | 	return r.ReadText(), length
28 | }
29 | 
30 | func RawString(r *Reader) string {
31 | 	ch, ok := r.ReadByte() // read character after '`'
32 | 	for ch != '`' {
33 | 		if !ok {
34 | 			panic(fmt.Errorf("literal not terminated"))
35 | 		}
36 | 		ch, ok = r.ReadByte()
37 | 	}
38 | 	return r.ReadText()
39 | }
40 | 
41 | // Comment 单行注释
42 | func Comment(r *Reader) string {
43 | 	ch, ok := r.ReadByte() // read character after "//"
44 | 	for ok && ch != '\n' {
45 | 		ch, ok = r.ReadByte()
46 | 	}
47 | 	r.GoBack()
48 | 	return r.ReadText()
49 | }
50 | 
51 | // 处理转义字符
52 | func escape(r *Reader, quote byte) byte {
53 | 	ch, _ := r.ReadByte() // read character after '/'
54 | 	switch ch {
55 | 	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
56 | 		// 常见的转义字符， 只需要读一个字符即可
57 | 		ch, _ = r.ReadByte()
58 | 	case '0', '1', '2', '3', '4', '5', '6', '7':
59 | 		// 处理形如 \123 的八进制转义序列
60 | 		// 最多读取 3 位八进制数字
61 | 		ch = number(r, ch, 8, 3)
62 | 	case 'x':
63 | 		ch, _ = r.ReadByte()
64 | 		ch = number(r, ch, 16, 2)
65 | 	case 'u':
66 | 		ch, _ = r.ReadByte()
67 | 		ch = number(r, ch, 16, 4)
68 | 	case 'U':
69 | 		ch, _ = r.ReadByte()
70 | 		ch = number(r, ch, 16, 8)
71 | 	default:
72 | 		panic(fmt.Errorf("invalid char escape"))
73 | 	}
74 | 	return ch
75 | }
76 | 
77 | // 处理数字部分
78 | func number(r *Reader, ch byte, base, n int) byte {
79 | 	for n > 0 && digitVal(ch) < base {
80 | 		ch, _ = r.ReadByte()
81 | 		n--
82 | 	}
83 | 	if n > 0 {
84 | 		panic(fmt.Errorf("invalid char escape"))
85 | 	}
86 | 	return ch
87 | }
88 | 


--------------------------------------------------------------------------------
/internal/os/elf/debug.go:
--------------------------------------------------------------------------------
  1 | package elf
  2 | 
  3 | import (
  4 | 	"strings"
  5 | )
  6 | 
  7 | //type intName struct {
  8 | //	i uint64
  9 | //	s string
 10 | //}
 11 | //
 12 | //func stringName(i uint64, names []intName, goSyntax bool) string {
 13 | //	for _, n := range names {
 14 | //		if n.i == i {
 15 | //			if goSyntax {
 16 | //				return "elf." + n.s
 17 | //			}
 18 | //			return n.s
 19 | //		}
 20 | //	}
 21 | //
 22 | //	// second pass - look for smaller to add with.
 23 | //	// assume sorted already
 24 | //	for j := len(names) - 1; j >= 0; j-- {
 25 | //		n := names[j]
 26 | //		if n.i < i {
 27 | //			s := n.s
 28 | //			if goSyntax {
 29 | //				s = "elf." + s
 30 | //			}
 31 | //			return s + "+" + strconv.FormatUint(uint64(i-n.i), 10)
 32 | //		}
 33 | //	}
 34 | //
 35 | //	return strconv.FormatUint(uint64(i), 10)
 36 | //}
 37 | //
 38 | //var shtStrings = []intName{
 39 | //	{0, "SHT_NULL"},
 40 | //	{1, "SHT_PROGBITS"},
 41 | //	{2, "SHT_SYMTAB"},
 42 | //	{3, "SHT_STRTAB"},
 43 | //	{4, "SHT_RELA"},
 44 | //	{5, "SHT_HASH"},
 45 | //	{6, "SHT_DYNAMIC"},
 46 | //	{7, "SHT_NOTE"},
 47 | //	{8, "SHT_NOBITS"},
 48 | //	{9, "SHT_REL"},
 49 | //	{10, "SHT_SHLIB"},
 50 | //	{11, "SHT_DYNSYM"},
 51 | //	{14, "SHT_INIT_ARRAY"},
 52 | //	{15, "SHT_FINI_ARRAY"},
 53 | //	{16, "SHT_PREINIT_ARRAY"},
 54 | //	{17, "SHT_GROUP"},
 55 | //	{18, "SHT_SYMTAB_SHNDX"},
 56 | //	{0x60000000, "SHT_LOOS"},
 57 | //	{0x6ffffff5, "SHT_GNU_ATTRIBUTES"},
 58 | //	{0x6ffffff6, "SHT_GNU_HASH"},
 59 | //	{0x6ffffff7, "SHT_GNU_LIBLIST"},
 60 | //	{0x6ffffffd, "SHT_GNU_VERDEF"},
 61 | //	{0x6ffffffe, "SHT_GNU_VERNEED"},
 62 | //	{0x6fffffff, "SHT_GNU_VERSYM"},
 63 | //	{0x70000000, "SHT_LOPROC"},
 64 | //	{0x7000002a, "SHT_MIPS_ABIFLAGS"},
 65 | //	{0x7fffffff, "SHT_HIPROC"},
 66 | //	{0x80000000, "SHT_LOUSER"},
 67 | //	{0xffffffff, "SHT_HIUSER"},
 68 | //}
 69 | 
 70 | func SectionTypeName(i uint32) string {
 71 | 	return stringName(i, shtStrings, false)
 72 | }
 73 | 
 74 | //var shfStrings = []intName{
 75 | //	{0x1, "SHF_WRITE"},
 76 | //	{0x2, "SHF_ALLOC"},
 77 | //	{0x4, "SHF_EXECINSTR"},
 78 | //	{0x10, "SHF_MERGE"},
 79 | //	{0x20, "SHF_STRINGS"},
 80 | //	{0x40, "SHF_INFO_LINK"},
 81 | //	{0x80, "SHF_LINK_ORDER"},
 82 | //	{0x100, "SHF_OS_NONCONFORMING"},
 83 | //	{0x200, "SHF_GROUP"},
 84 | //	{0x400, "SHF_TLS"},
 85 | //	{0x800, "SHF_COMPRESSED"},
 86 | //}
 87 | 
 88 | func SectionFlagName(i uint64) string {
 89 | 	return stringName(uint32(i), shfStrings, false)
 90 | }
 91 | 
 92 | func StringTableName(bytes []byte, start uint32) string {
 93 | 	builder := strings.Builder{}
 94 | 	ch := bytes[start]
 95 | 	offset := start
 96 | 	for ch != 0 {
 97 | 		builder.WriteByte(ch)
 98 | 		offset += 1
 99 | 		ch = bytes[offset]
100 | 	}
101 | 	return builder.String()
102 | }
103 | 


--------------------------------------------------------------------------------
/internal/reader/reader.go:
--------------------------------------------------------------------------------
  1 | package reader
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"os"
  6 | 	"unicode/utf8"
  7 | )
  8 | 
  9 | type Reader struct {
 10 | 	filename       string // 文件名称
 11 | 	buff           []byte // 缓存池
 12 | 	ch             byte   // 主要记录换行符更新
 13 | 	chw            int    // 缓存字符宽度, 下一次读更新上一个字符宽度
 14 | 	b, r, e        int    // 读取器游标
 15 | 	line, col, off int    // 文件指针
 16 | }
 17 | 
 18 | func (r *Reader) errorf(format string, args ...any) {
 19 | 	panic(fmt.Errorf("Reader Error: %s\n\t->[%d, %d] %s",
 20 | 		fmt.Sprintf(format, args...), r.line+1, r.col+1, r.filename))
 21 | }
 22 | 
 23 | func (r *Reader) Pos() int {
 24 | 	return r.off
 25 | }
 26 | 
 27 | // GoBack 回退一个字符
 28 | func (r *Reader) GoBack() {
 29 | 	r.ch = 0
 30 | 	r.chw = 0
 31 | }
 32 | 
 33 | // ReadByte 返回值是否为 eof
 34 | func (r *Reader) ReadByte() (byte, bool) {
 35 | 	if r.chw > 0 { // 文件位置信息记录更新， 下一个字符开始 = 上一个字符结束 + 上一个字符宽度
 36 | 		r.r += r.chw
 37 | 		r.off += r.chw
 38 | 
 39 | 		if r.ch == '\n' {
 40 | 			r.col = 0
 41 | 			r.line += 1
 42 | 		} else {
 43 | 			r.col += 1 // utf8 字符占一列
 44 | 		}
 45 | 
 46 | 		r.chw = 0
 47 | 	}
 48 | 
 49 | 	// eof
 50 | 	if r.r == r.e {
 51 | 		r.ch = 0
 52 | 		r.chw = 0
 53 | 		return 0, true
 54 | 	}
 55 | 
 56 | 	r.ch = r.buff[r.r]
 57 | 	r.chw = 1
 58 | 	return r.ch, false
 59 | }
 60 | 
 61 | func (r *Reader) ReadRune() (rune, int) {
 62 | redo:
 63 | 	c, eof := r.ReadByte()
 64 | 	if eof {
 65 | 		return 0, 0
 66 | 	}
 67 | 
 68 | 	if c < utf8.RuneSelf {
 69 | 		return rune(c), 1
 70 | 	}
 71 | 
 72 | 	// 解码 UTF-8 字符
 73 | 	ch, chw := utf8.DecodeRune(r.buff[r.r:r.e])
 74 | 
 75 | 	r.chw = chw
 76 | 
 77 | 	// 检查解码错误
 78 | 	if ch == utf8.RuneError && chw == 1 { // 无效的 UTF-8 编码
 79 | 		r.errorf("invalid UTF-8 encoding at position %d", r.off-1)
 80 | 	}
 81 | 
 82 | 	const BOM = 0xfeff
 83 | 	if ch == BOM {
 84 | 		if r.off > 0 {
 85 | 			r.errorf("invalid BOM in the middle of the file")
 86 | 		}
 87 | 		goto redo // 忽略 BOM 字符
 88 | 	}
 89 | 
 90 | 	return ch, chw
 91 | }
 92 | 
 93 | // TextReady 文本读取器准备就绪
 94 | func (r *Reader) TextReady() {
 95 | 	r.b = r.r
 96 | }
 97 | 
 98 | // ReadText 读取一段本文
 99 | func (r *Reader) ReadText() string {
100 | 	defer func() {
101 | 		r.b = -1 // 重置游标
102 | 	}()
103 | 
104 | 	return string(r.buff[r.b : r.r+r.chw])
105 | }
106 | 
107 | // FileReader todo 蔚来可能扩展支持 多种数据源读取模式，比如数据流
108 | func FileReader(file string) *Reader {
109 | 	r := &Reader{filename: file}
110 | 
111 | 	buff, err := os.ReadFile(file)
112 | 	if err != nil {
113 | 		r.errorf("failed to read file: %s", err)
114 | 	}
115 | 
116 | 	r.buff = buff
117 | 	r.e = len(r.buff)
118 | 	return r
119 | }
120 | 
121 | func BytesReader(input []byte) *Reader {
122 | 	r := &Reader{filename: "#Bytes"}
123 | 	r.buff = input
124 | 	r.e = len(input)
125 | 	return r
126 | }
127 | 


--------------------------------------------------------------------------------
/internal/reader/decimal_test.go:
--------------------------------------------------------------------------------
 1 | package reader
 2 | 
 3 | import (
 4 | 	"github.com/facelang/face/compiler/compile/internal/tokens"
 5 | 	"testing"
 6 | )
 7 | 
 8 | func TestDecimal(t *testing.T) {
 9 | 	tests := []struct {
10 | 		name     string
11 | 		input    string
12 | 		wantTok  tokens.Token
13 | 		wantText string
14 | 		wantErr  bool
15 | 	}{
16 | 		// 十进制整数测试
17 | 		{"decimal integer", "123", tokens.INT, "123", false},
18 | 		{"decimal integer with underscore", "1_2_3", tokens.INT, "123", false},
19 | 		{"decimal zero", "0", tokens.INT, "0", false},
20 | 
21 | 		// 八进制测试
22 | 		{"octal with prefix 0", "0123", tokens.INT, "0123", false},
23 | 		{"octal with prefix o", "0o123", tokens.INT, "0o123", false},
24 | 		{"invalid octal float", "0o1.2", tokens.INT, "0o1", true},
25 | 
26 | 		// 十六进制测试
27 | 		{"hex with prefix x", "0x1A", tokens.INT, "0x1A", false},
28 | 		{"hex with prefix X", "0X1a", tokens.INT, "0X1a", false},
29 | 		{"hex float", "0x1.2p3", tokens.FLOAT, "0x1.2p3", false},
30 | 		{"hex float with capital P", "0x1.2P3", tokens.FLOAT, "0x1.2P3", false},
31 | 		{"hex float with negative exponent", "0x1.2p-3", tokens.FLOAT, "0x1.2p-3", false},
32 | 
33 | 		// 二进制测试
34 | 		{"binary with prefix b", "0b1010", tokens.INT, "0b1010", false},
35 | 		{"binary with prefix B", "0B1010", tokens.INT, "0B1010", false},
36 | 		{"invalid binary float", "0b1.01", tokens.INT, "0b1", true},
37 | 
38 | 		// 十进制浮点数测试
39 | 		{"decimal float", "123.456", tokens.FLOAT, "123.456", false},
40 | 		{"decimal float with exponent", "123.456e10", tokens.FLOAT, "123.456e10", false},
41 | 		{"decimal float with capital E", "123.456E10", tokens.FLOAT, "123.456E10", false},
42 | 		{"decimal float with negative exponent", "123.456e-10", tokens.FLOAT, "123.456e-10", false},
43 | 		{"decimal float with positive exponent", "123.456e+10", tokens.FLOAT, "123.456e+10", false},
44 | 
45 | 		// 错误情况测试
46 | 		{"invalid hex exponent", "0x1.2e3", tokens.FLOAT, "0x1", true},
47 | 		{"invalid decimal exponent", "0o1.2e3", tokens.FLOAT, "0o1", true},
48 | 		{"hex float without exponent", "0x1.2", tokens.FLOAT, "0x1", true},
49 | 		{"no digits", "0x", tokens.INT, "0x", true},
50 | 	}
51 | 
52 | 	for _, tt := range tests {
53 | 		t.Run(tt.name, func(t *testing.T) {
54 | 			// 创建 reader
55 | 			r := BytesReader([]byte(tt.input))
56 | 			first, _ := r.ReadByte()
57 | 
58 | 			defer func() {
59 | 				if r := recover(); r != nil {
60 | 					if !tt.wantErr {
61 | 						t.Errorf("Decimal() unexpected panic: %v", r)
62 | 					}
63 | 				}
64 | 			}()
65 | 
66 | 			gotTok, gotText := Decimal(r, first)
67 | 
68 | 			if !tt.wantErr {
69 | 				if gotTok != tt.wantTok {
70 | 					t.Errorf("Decimal() got token = %v, want %v", gotTok, tt.wantTok)
71 | 				}
72 | 				if gotText != tt.wantText {
73 | 					t.Errorf("Decimal() got text = %v, want %v", gotText, tt.wantText)
74 | 				}
75 | 			}
76 | 		})
77 | 	}
78 | }
79 | 


--------------------------------------------------------------------------------
/internal/os/elf/prog.go:
--------------------------------------------------------------------------------
  1 | package elf
  2 | 
  3 | import "encoding/binary"
  4 | 
  5 | // Block 表示一个数据块
  6 | type Block struct {
  7 | 	Data   []byte
  8 | 	Offset uint32
  9 | 	Size   uint32
 10 | }
 11 | 
 12 | // ProgSeg 表示段的列表, 还有两个方法： allocAddr, relocAddr
 13 | type ProgSeg struct {
 14 | 	Name      string   // 段名称
 15 | 	BaseAddr  uint32   // 分配基地址
 16 | 	Offset    uint32   // 合并后的文件偏移
 17 | 	Size      uint32   // 合并后大小
 18 | 	Begin     uint32   // 对齐前开始位置偏移
 19 | 	OwnerList []*File  // 拥有该段的文件序列
 20 | 	Blocks    []*Block // 记录合并后的数据块序列
 21 | }
 22 | 
 23 | // AllocAddr 分配地址空间 base 是基址， off 是偏移
 24 | func (s *ProgSeg) AllocAddr(name string, base *uint32, off *uint32) {
 25 | 	s.Begin = *off //记录对齐前偏移
 26 | 
 27 | 	// 虚拟地址对齐，让所有的段按照4KB字节对齐
 28 | 	if name != ".bss" {
 29 | 		*base += (MemAlign - *base%MemAlign) % MemAlign
 30 | 	}
 31 | 
 32 | 	// 偏移地址对齐，让一般段按照4字节对齐，文本段按照16字节对齐
 33 | 	align := uint32(DiscAlign)
 34 | 	if name == ".text" {
 35 | 		align = 16
 36 | 	}
 37 | 	*off += (align - *off%align) % align
 38 | 	// 这里 off 的偏移和 base 的偏移不同
 39 | 
 40 | 	// 使虚址和偏移按照4KB模同余
 41 | 	*base = *base - *base%MemAlign + *off%MemAlign // todo 有些看不懂了
 42 | 
 43 | 	// 累加地址和偏移
 44 | 	s.BaseAddr = *base
 45 | 	s.Offset = *off
 46 | 	s.Size = 0
 47 | 	for _, file := range s.OwnerList { // 拥有该段的所有文件，合并数据
 48 | 		s.Size += (DiscAlign - s.Size%DiscAlign) % DiscAlign // 对齐每个小段，按照4字节，数据靠后
 49 | 		seg := file.ShdrTab[name]
 50 | 		//读取需要合并段的数据
 51 | 		if name != ".bss" {
 52 | 			buf := file.ReadData(seg.Offset, seg.Size)
 53 | 			block := &Block{
 54 | 				Data:   buf,
 55 | 				Offset: s.Size, // 数据靠前靠后是否有区别？
 56 | 				Size:   seg.Size,
 57 | 			}
 58 | 			s.Blocks = append(s.Blocks, block) // 添加到数据块
 59 | 		}
 60 | 		//修改每个文件中对应段的addr（seg 记录虚拟地址， 代表每一段数据在程序运行时加载到不同的地址段）
 61 | 		seg.Addr = *base + s.Size //修改每个文件的段虚拟，为了方便计算符号或者重定位的虚址，不需要保存合并后文件偏移
 62 | 		s.Size += seg.Size        //累加段大小
 63 | 	}
 64 | 	*base += s.Size // 基址也需要更新
 65 | 	if name != ".bss" {
 66 | 		*off += s.Size
 67 | 	}
 68 | }
 69 | 
 70 | // RelocAddr 根据提供的重定位信息重定位地址
 71 | func (s *ProgSeg) RelocAddr(relAddr uint32, relocType uint8, symAddr uint32) {
 72 | 	relOffset := relAddr - s.BaseAddr //同类合并段的数据偏移
 73 | 
 74 | 	// 查找修正地址所在位置的数据块
 75 | 	var targetBlock *Block
 76 | 	for _, block := range s.Blocks {
 77 | 		if block.Offset <= relOffset && block.Offset+block.Size > relOffset {
 78 | 			targetBlock = block
 79 | 			break
 80 | 		}
 81 | 	}
 82 | 	if targetBlock == nil {
 83 | 		return
 84 | 	}
 85 | 
 86 | 	//处理字节为b->data[relOffset-b->offset]
 87 | 	// 获取需要修改的地址位置
 88 | 	offset := relOffset - targetBlock.Offset
 89 | 	if offset+4 > uint32(len(targetBlock.Data)) {
 90 | 		return
 91 | 	}
 92 | 
 93 | 	// 获取当前地址值
 94 | 	currentAddr := binary.LittleEndian.Uint32(targetBlock.Data[offset:])
 95 | 
 96 | 	// 根据重定位类型进行修正
 97 | 	switch relocType {
 98 | 	case uint8(R_386_32): // 绝对地址修正
 99 | 		binary.LittleEndian.PutUint32(targetBlock.Data[offset:], symAddr)
100 | 	case uint8(R_386_PC32): // 相对地址修正
101 | 		newAddr := symAddr - relAddr + currentAddr
102 | 		binary.LittleEndian.PutUint32(targetBlock.Data[offset:], newAddr)
103 | 	}
104 | }
105 | 


--------------------------------------------------------------------------------
/internal/os/elf/writer.go:
--------------------------------------------------------------------------------
  1 | package elf
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"os"
  7 | )
  8 | 
  9 | type fileWriter struct {
 10 | 	name  string           // 文件名
 11 | 	w     *bytes.Buffer    // 文件输出
 12 | 	err   error            // 错误记录
 13 | 	order binary.ByteOrder // 读取器
 14 | }
 15 | 
 16 | type FileWriter = *fileWriter
 17 | 
 18 | func (f *fileWriter) Write(data any) error {
 19 | 	defer func() {
 20 | 		f.err = nil
 21 | 	}()
 22 | 	if f.err != nil {
 23 | 		return f.err
 24 | 	}
 25 | 	return binary.Write(f.w, f.order, data)
 26 | }
 27 | 
 28 | func (f *fileWriter) Flush() error {
 29 | 	if f.err != nil {
 30 | 		return f.err
 31 | 	}
 32 | 	w, err := os.Create(f.name) // 可以覆盖
 33 | 	if err != nil {
 34 | 		return err
 35 | 	}
 36 | 	_, err = w.Write(f.w.Bytes())
 37 | 	return err
 38 | }
 39 | 
 40 | func NewWriter(file string, order binary.ByteOrder) FileWriter {
 41 | 	//w, err := os.Create(file) // 可以覆盖
 42 | 	w, err := bytes.NewBuffer(nil), error(nil)
 43 | 	return &fileWriter{name: file, w: w, err: err, order: order}
 44 | }
 45 | 
 46 | // FileWrite 输出elf 文件
 47 | func FileWrite(file *File, target string) error {
 48 | 	w := NewWriter(target, file.Endian())
 49 | 	_ = w.Write(file.Ehdr) //elf文件头
 50 | 
 51 | 	// 可执行文件
 52 | 	if file.Ehdr.Type == Elf32_Half(ET_EXEC) {
 53 | 		//程序头表
 54 | 		for _, phdr := range file.PhdrTab {
 55 | 			_ = w.Write(phdr)
 56 | 		}
 57 | 		// 【数据段】最重要的部分
 58 | 		pad := [1]byte{0}
 59 | 		for _, seg := range file.ProgSegList {
 60 | 			padnum := seg.Offset - seg.Begin
 61 | 			for ; padnum != 0; padnum-- { //填充
 62 | 				_ = w.Write(pad)
 63 | 			}
 64 | 			if seg.Name == ".bss" {
 65 | 				continue
 66 | 			}
 67 | 			var oldBlock *Block = nil
 68 | 			instPad := [1]byte{0x90}
 69 | 			for i := 0; i < len(seg.Blocks); i++ {
 70 | 				b := seg.Blocks[i]
 71 | 				if oldBlock != nil {
 72 | 					padnum = b.Offset - (oldBlock.Offset + oldBlock.Size)
 73 | 					for ; padnum != 0; padnum-- { //填充
 74 | 						_ = w.Write(instPad)
 75 | 					}
 76 | 				}
 77 | 				oldBlock = b
 78 | 				_ = w.Write(b.Data)
 79 | 			}
 80 | 		}
 81 | 	} else {
 82 | 		// 【数据段】最重要的部分
 83 | 		var prev *ProgSeg = nil
 84 | 		pad := [1]byte{0}
 85 | 		for _, seg := range file.ProgSegList {
 86 | 			if prev != nil {
 87 | 				padnum := seg.Offset - (prev.Offset + prev.Size)
 88 | 				for ; padnum != 0; padnum-- { //填充
 89 | 					_ = w.Write(pad)
 90 | 				}
 91 | 			}
 92 | 			if seg.Name == ".bss" {
 93 | 				continue
 94 | 			}
 95 | 			for i := 0; i < len(seg.Blocks); i++ {
 96 | 				b := seg.Blocks[i]
 97 | 				_ = w.Write(b.Data)
 98 | 			}
 99 | 			prev = seg
100 | 		}
101 | 	}
102 | 
103 | 	// 最后写段表字符串
104 | 	_ = w.Write(file.Shstrtab)
105 | 
106 | 	// 段表
107 | 	for _, sh := range file.ShdrNames {
108 | 		_ = w.Write(file.ShdrTab[sh])
109 | 	}
110 | 
111 | 	// 符号表
112 | 	for _, sym := range file.SymNames {
113 | 		_ = w.Write(file.SymTab[sym])
114 | 	}
115 | 
116 | 	// 字符串表
117 | 	_ = w.Write(file.Strtab)
118 | 
119 | 	// 重定位表
120 | 	for _, rel := range file.RelTab {
121 | 		_ = w.Write(rel.Rel)
122 | 	}
123 | 
124 | 	return w.Flush() // 最后一部再写入文件
125 | }
126 | 


--------------------------------------------------------------------------------
/internal/prog/error.go:
--------------------------------------------------------------------------------
  1 | package prog
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"sort"
  7 | )
  8 | 
  9 | type Error struct {
 10 | 	Pos     FilePos
 11 | 	Message string
 12 | }
 13 | 
 14 | // Error implements the error interface.
 15 | func (e Error) Error() string {
 16 | 	return fmt.Sprintf("%s:\n\t_>%s", e.Pos.String(), e.Message)
 17 | }
 18 | 
 19 | type ErrorList []*Error
 20 | 
 21 | // Add adds an [Error] with given position and error message to an [ErrorList].
 22 | func (p *ErrorList) Add(pos FilePos, msg string) {
 23 | 	*p = append(*p, &Error{pos, msg})
 24 | }
 25 | 
 26 | // Reset resets an [ErrorList] to no errors.
 27 | func (p *ErrorList) Reset() { *p = (*p)[0:0] }
 28 | 
 29 | // [ErrorList] implements the sort Interface.
 30 | func (p ErrorList) Len() int      { return len(p) }
 31 | func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
 32 | 
 33 | func (p ErrorList) Less(i, j int) bool {
 34 | 	e := &p[i].Pos
 35 | 	f := &p[j].Pos
 36 | 	// Note that it is not sufficient to simply compare file offsets because
 37 | 	// the offsets do not reflect modified line information (through //line
 38 | 	// comments).
 39 | 	if e.Filename != f.Filename {
 40 | 		return e.Filename < f.Filename
 41 | 	}
 42 | 	if e.Line != f.Line {
 43 | 		return e.Line < f.Line
 44 | 	}
 45 | 	if e.Col != f.Col {
 46 | 		return e.Col < f.Col
 47 | 	}
 48 | 	return p[i].Message < p[j].Message
 49 | }
 50 | 
 51 | // Sort sorts an [ErrorList]. *[Error] entries are sorted by position,
 52 | // other errors are sorted by error message, and before any *[Error]
 53 | // entry.
 54 | func (p ErrorList) Sort() {
 55 | 	sort.Sort(p)
 56 | }
 57 | 
 58 | // RemoveMultiples sorts an [ErrorList] and removes all but the first error per line.
 59 | func (p *ErrorList) RemoveMultiples() {
 60 | 	sort.Sort(p)
 61 | 	var last FilePos // initial last.Line is != any legal error line
 62 | 	i := 0
 63 | 	for _, e := range *p {
 64 | 		if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line {
 65 | 			last = e.Pos
 66 | 			(*p)[i] = e
 67 | 			i++
 68 | 		}
 69 | 	}
 70 | 	*p = (*p)[0:i]
 71 | }
 72 | 
 73 | // An [ErrorList] implements the error interface.
 74 | func (p ErrorList) Error() string {
 75 | 	switch len(p) {
 76 | 	case 0:
 77 | 		return "no errors"
 78 | 	case 1:
 79 | 		return p[0].Error()
 80 | 	}
 81 | 	return fmt.Sprintf("%s (and %d more errors)", p[0], len(p)-1)
 82 | }
 83 | 
 84 | // Err returns an error equivalent to this error list.
 85 | // If the list is empty, Err returns nil.
 86 | func (p ErrorList) Err() error {
 87 | 	if len(p) == 0 {
 88 | 		return nil
 89 | 	}
 90 | 	return p
 91 | }
 92 | 
 93 | // PrintError is a utility function that prints a list of errors to w,
 94 | // one error per line, if the err parameter is an [ErrorList]. Otherwise
 95 | // it prints the err string.
 96 | func PrintError(w io.Writer, err error) {
 97 | 	if list, ok := err.(ErrorList); ok {
 98 | 		for _, e := range list {
 99 | 			fmt.Fprintf(w, "%s\n", e)
100 | 		}
101 | 	} else if err != nil {
102 | 		fmt.Fprintf(w, "%s\n", err)
103 | 	}
104 | }
105 | 
106 | type ErrorFunc func(info *FilePos, msg string)
107 | 


--------------------------------------------------------------------------------
/internal/reader/reader_test.go:
--------------------------------------------------------------------------------
  1 | package reader
  2 | 
  3 | import (
  4 | 	"os"
  5 | 	"testing"
  6 | 	"unicode/utf8"
  7 | )
  8 | 
  9 | func TestReadRune(t *testing.T) {
 10 | 	// 创建测试文件，包含各种 UTF-8 字符
 11 | 	testContent := "Hello 世界\n你好 🌍\nTest 测试"
 12 | 	testFile := "test_utf8.txt"
 13 | 
 14 | 	// 写入测试文件
 15 | 	err := os.WriteFile(testFile, []byte(testContent), 0644)
 16 | 	if err != nil {
 17 | 		t.Fatalf("Failed to create test file: %v", err)
 18 | 	}
 19 | 	defer os.Remove(testFile)
 20 | 
 21 | 	// 创建 Reader
 22 | 	reader := FileReader(testFile)
 23 | 
 24 | 	// 读取所有字符并验证
 25 | 	var result []rune
 26 | 	for {
 27 | 		ch, chw := reader.ReadRune()
 28 | 		if chw == 0 {
 29 | 			break
 30 | 		}
 31 | 		t.Logf("Read rune: %c (%U)", ch, ch)
 32 | 		result = append(result, ch)
 33 | 	}
 34 | 
 35 | 	// 验证读取的字符是否正确
 36 | 	expected := []rune(testContent)
 37 | 	t.Logf("Got content: %q", result)
 38 | 	t.Logf("Expected length: %d, Got length: %d", len(expected), len(result))
 39 | 
 40 | 	if len(result) != len(expected) {
 41 | 		t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected))
 42 | 	}
 43 | 
 44 | 	for i, r := range result {
 45 | 		if r != expected[i] {
 46 | 			t.Errorf("Character mismatch at position %d: got %c (%U), expected %c (%U)",
 47 | 				i, r, r, expected[i], expected[i])
 48 | 		}
 49 | 	}
 50 | }
 51 | 
 52 | func TestReadRuneASCII(t *testing.T) {
 53 | 	// 测试纯 ASCII 字符
 54 | 	testContent := "Hello World\nTest 123"
 55 | 	testFile := "test_ascii.txt"
 56 | 
 57 | 	err := os.WriteFile(testFile, []byte(testContent), 0644)
 58 | 	if err != nil {
 59 | 		t.Fatalf("Failed to create test file: %v", err)
 60 | 	}
 61 | 	defer os.Remove(testFile)
 62 | 
 63 | 	reader := FileReader(testFile)
 64 | 
 65 | 	var result []rune
 66 | 	for {
 67 | 		r, chw := reader.ReadRune()
 68 | 		if chw == 0 {
 69 | 			break
 70 | 		}
 71 | 		result = append(result, r)
 72 | 	}
 73 | 
 74 | 	expected := []rune(testContent)
 75 | 	if len(result) != len(expected) {
 76 | 		t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected))
 77 | 	}
 78 | 
 79 | 	for i, r := range result {
 80 | 		if r != expected[i] {
 81 | 			t.Errorf("Character mismatch at position %d: got %c, expected %c", i, r, expected[i])
 82 | 		}
 83 | 	}
 84 | }
 85 | 
 86 | func TestReadRuneEmoji(t *testing.T) {
 87 | 	// 测试包含 emoji 的文本
 88 | 	testContent := "Hello 🌍 World 🚀"
 89 | 	testFile := "test_emoji.txt"
 90 | 
 91 | 	err := os.WriteFile(testFile, []byte(testContent), 0644)
 92 | 	if err != nil {
 93 | 		t.Fatalf("Failed to create test file: %v", err)
 94 | 	}
 95 | 	defer os.Remove(testFile)
 96 | 
 97 | 	reader := FileReader(testFile)
 98 | 
 99 | 	var result []rune
100 | 	for {
101 | 		r, chw := reader.ReadRune()
102 | 		if chw == 0 {
103 | 			break
104 | 		}
105 | 		result = append(result, r)
106 | 	}
107 | 
108 | 	expected := []rune(testContent)
109 | 	if len(result) != len(expected) {
110 | 		t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected))
111 | 	}
112 | 
113 | 	for i, r := range result {
114 | 		if r != expected[i] {
115 | 			t.Errorf("Character mismatch at position %d: got %c (%U), expected %c (%U)",
116 | 				i, r, r, expected[i], expected[i])
117 | 		}
118 | 	}
119 | }
120 | 
121 | func TestReadRuneInvalidUTF8(t *testing.T) {
122 | 	// 测试无效的 UTF-8 序列
123 | 	invalidUTF8 := []byte{0xFF, 0xFE, 0xFD} // 无效的 UTF-8 字节序列
124 | 	testFile := "test_invalid.txt"
125 | 
126 | 	err := os.WriteFile(testFile, invalidUTF8, 0644)
127 | 	if err != nil {
128 | 		t.Fatalf("Failed to create test file: %v", err)
129 | 	}
130 | 	defer os.Remove(testFile)
131 | 
132 | 	reader := FileReader(testFile)
133 | 
134 | 	// 读取第一个字符，应该返回 RuneError
135 | 	r, chw := reader.ReadRune()
136 | 	if r != utf8.RuneError {
137 | 		t.Errorf("Expected RuneError for invalid UTF-8, got %c (%U)", r, r)
138 | 	}
139 | 	if chw != 1 {
140 | 		t.Error("Expected width 1 for invalid UTF-8")
141 | 	}
142 | }
143 | 


--------------------------------------------------------------------------------
/internal/reader/decimal.go:
--------------------------------------------------------------------------------
  1 | package reader
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strings"
  6 | )
  7 | 
  8 | type NumberType int8
  9 | 
 10 | const INT_TYPE NumberType = 1
 11 | const FLOAT_TYPE NumberType = 2
 12 | 
 13 | // Number 这是一个数字的解析器, 读取结束，最后一个字符不是有效数字， 可能是其它符号，所以需要退回最后一个
 14 | func Number(r *Reader, first rune) (NumberType, string) {
 15 | 	defer func() {
 16 | 		r.GoBack() // 最后一个符号需要回退
 17 | 	}()
 18 | 
 19 | 	base := 10        // 数字基数
 20 | 	prefix := byte(0) // 前缀：0(十进制), '0'(八进制), 'x'(十六进制), 'o'(八进制), 'b'(二进制)
 21 | 	flags := byte(0)  // 位标志：bit 0: 有数字, bit 1: 有下划线, bit 2 符号异常
 22 | 
 23 | 	// 整数部分
 24 | 	ds := byte(0)
 25 | 	ch := byte(first)
 26 | 	tok := INT_TYPE
 27 | 
 28 | 	if first == '0' {
 29 | 		ch, _ = r.ReadByte()
 30 | 		switch ch {
 31 | 		case '.': // 小数
 32 | 			tok = FLOAT_TYPE
 33 | 		case 'x', 'X':
 34 | 			ch, _ = r.ReadByte()
 35 | 			base, prefix = 16, 'x'
 36 | 		case 'o', 'O':
 37 | 			ch, _ = r.ReadByte()
 38 | 			base, prefix = 8, 'o'
 39 | 		case 'b', 'B':
 40 | 			ch, _ = r.ReadByte()
 41 | 			base, prefix = 2, 'b'
 42 | 		default:
 43 | 			base, prefix = 8, '0'
 44 | 			flags = 1 // 前导0, 或者 只为 0
 45 | 		}
 46 | 	} else if first == '.' {
 47 | 		tok = FLOAT_TYPE
 48 | 	} else {
 49 | 		flags = 1 // 前导数
 50 | 	}
 51 | 
 52 | 	// 整数和16进制支持小数表达， 先读取整数部分
 53 | 	// 123.456 和 0x1.2p3 都是合法的
 54 | 	if tok == INT_TYPE || prefix != 'x' {
 55 | 		ch, ds = digits(r, ch, base) // 解析所有数字和下划线
 56 | 		flags |= ds                  // ds 的值为 01 表示有数字，10 表示有下划线
 57 | 		if ch == '.' {
 58 | 			if flags&1 == 0 { // 0x. 是非法的
 59 | 				panic(fmt.Errorf("%s has no digits", decimalName(prefix)))
 60 | 			}
 61 | 			tok = FLOAT_TYPE
 62 | 		}
 63 | 	}
 64 | 
 65 | 	// 非十进制，或者小数 （小数点后的数字或其它进制）
 66 | 	if tok == FLOAT_TYPE || prefix != 0 {
 67 | 		ch, ds = digits(r, ch, base) // 解析所有数字和下划线
 68 | 		flags |= ds                  // ds 的值为 01 表示有数字，10 表示有下划线
 69 | 		if flags&1 == 0 {            // 没有读取到数字
 70 | 			panic(fmt.Errorf("%s has no digits", decimalName(prefix)))
 71 | 		}
 72 | 	}
 73 | 
 74 | 	// 指数部分（e/E 用于十进制，p/P 用于十六进制）
 75 | 	if e := ch; e == 'e' || e == 'E' || e == 'p' || e == 'P' {
 76 | 		if (e == 'e' || e == 'E') && prefix != 0 {
 77 | 			panic(fmt.Errorf("%q exponent requires decimal mantissa", ch))
 78 | 		}
 79 | 		if (e == 'p' || e == 'P') && prefix != 'x' {
 80 | 			panic(fmt.Errorf("%q exponent requires hexadecimal mantissa", ch))
 81 | 		}
 82 | 
 83 | 		ch, _ = r.ReadByte()
 84 | 		tok = FLOAT_TYPE
 85 | 		if ch == '+' || ch == '-' {
 86 | 			ch, _ = r.ReadByte()
 87 | 		}
 88 | 
 89 | 		_, ds = digits(r, ch, 10) // 指数后面的值， 只能十进制
 90 | 		flags |= ds
 91 | 
 92 | 		if ds&1 == 0 { // 指数后面没有数字
 93 | 			panic(fmt.Errorf("exponent has no digits"))
 94 | 		}
 95 | 	}
 96 | 
 97 | 	if flags&2 == 0 {
 98 | 		return tok, r.ReadText()
 99 | 	}
100 | 
101 | 	// 数字中有 _ 需要踢掉
102 | 	return tok, strings.ReplaceAll(r.ReadText(), "_", "")
103 | }
104 | 
105 | // 辅助函数：解析数字序列
106 | func digits(r *Reader, ch byte, base int) (byte, byte) {
107 | 	ds := byte(0) // 位标志：bit 0: 有数字, bit 1: 有下划线 bit 3: 异常
108 | 	for {
109 | 		if ch == '.' { // 不是小数点，直接跳出循环
110 | 			break
111 | 		}
112 | 		if ch == '_' {
113 | 			ds |= 2 // 记录下划线
114 | 			ch, _ = r.ReadByte()
115 | 			continue
116 | 		}
117 | 		d := digitVal(ch) // 获取字符的数值
118 | 		if d >= base {    // 如果数值大于等于基数
119 | 			ds |= 4 // 记录异常
120 | 			break   // 跳出循环
121 | 		}
122 | 		ds |= 1              // 记录数字
123 | 		ch, _ = r.ReadByte() // 读取下一个字符
124 | 	}
125 | 	return ch, ds
126 | }
127 | 
128 | // 辅助函数：获取数字值
129 | func digitVal(ch byte) int {
130 | 	switch {
131 | 	case '0' <= ch && ch <= '9':
132 | 		return int(ch - '0')
133 | 	case 'a' <= ch && ch <= 'z':
134 | 		return int(ch - 'a' + 10)
135 | 	case 'A' <= ch && ch <= 'Z':
136 | 		return int(ch - 'A' + 10)
137 | 	}
138 | 	return 36 // 大于任何有效数字
139 | }
140 | 
141 | // 辅助函数：获取数字字面量名称
142 | func decimalName(prefix byte) string {
143 | 	switch prefix {
144 | 	case 'x':
145 | 		return "hexadecimal"
146 | 	case 'o':
147 | 		return "octal"
148 | 	case 'b':
149 | 		return "binary"
150 | 	case '0':
151 | 		return "octal"
152 | 	default:
153 | 		return "decimal"
154 | 	}
155 | }
156 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/lexer.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"github.com/facelang/face/internal/reader"
  5 | 	"unicode"
  6 | 	"unicode/utf8"
  7 | )
  8 | 
  9 | // Whitespace 对比 map, switch 位掩码 比较效率最高, 忽略 \n
 10 | const Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' '
 11 | 
 12 | //type lexer struct {
 13 | //	reader *reader.Reader
 14 | //	token  tokens2.Token
 15 | //	ident  string
 16 | //}
 17 | //
 18 | //func (lex *lexer) NextToken() tokens2.Token {
 19 | //	ch, chw := lex.reader.ReadRune()
 20 | //	if chw == 0 {
 21 | //		return tokens2.EOF
 22 | //	}
 23 | //
 24 | //	// skip white space
 25 | //	for Whitespace&(1<<ch) != 0 {
 26 | //		ch, chw = lex.reader.ReadRune()
 27 | //	}
 28 | //
 29 | //	if chw == 0 {
 30 | //		return tokens2.EOF
 31 | //	}
 32 | //
 33 | //	lex.ident = ""
 34 | //
 35 | //	// start collecting token text
 36 | //	lex.reader.TextReady()
 37 | //
 38 | //	if '0' <= ch && ch <= '9' { // 数字
 39 | //		return GetDecimal(lex, ch)
 40 | //	}
 41 | //
 42 | //	if CheckIdent(ch, 0) { // 符号
 43 | //		for i := 1; CheckIdent(ch, i); i++ {
 44 | //			ch, chw = lex.reader.ReadRune()
 45 | //		}
 46 | //		lex.ident = lex.reader.ReadText()
 47 | //		return tokens2.IDENT
 48 | //	}
 49 | //
 50 | //	// determine token value
 51 | //	switch ch {
 52 | //	case '"':
 53 | //		ident, _ := reader.String(lex.reader, '"')
 54 | //		lex.ident = ident
 55 | //		return tokens2.STRING
 56 | //	case '\'':
 57 | //		lex.ident = reader.Char(lex.reader)
 58 | //		return tokens.CHAR
 59 | //	case '`':
 60 | //		lex.ident = reader.RawString(lex.reader)
 61 | //		return tokens2.STRING
 62 | //	case ';': // todo at&t 语法使用 # 作为注解
 63 | //		lex.ident = reader.Comment(lex.reader)
 64 | //		return tokens2.COMMENT
 65 | //	default:
 66 | //		return tokens2.Token(ch)
 67 | //	}
 68 | //}
 69 | //
 70 | //func CheckIdent(ch rune, i int) bool {
 71 | //	return ch == '.' || ch == '_' || unicode.IsLetter(ch) ||
 72 | //		unicode.IsDigit(ch) && i > 0 // 第一个字符必须是字母或下划线
 73 | //}
 74 | //
 75 | //func GetDecimal(lex *lexer, ch rune) tokens2.Token {
 76 | //	token, val := reader.Decimal(lex.reader, ch)
 77 | //	lex.ident = val
 78 | //	return token
 79 | //}
 80 | 
 81 | type lexer struct {
 82 | 	*reader.Reader        // 读取器
 83 | 	id             string // 暂存字符
 84 | 	pos            int    // 文件读取指针行列号
 85 | 	back           bool   // 回退标识
 86 | 	backToken      Token  // 回退Token
 87 | }
 88 | 
 89 | func (lex *lexer) Back(token Token) {
 90 | 	lex.back = true
 91 | 	lex.backToken = token
 92 | }
 93 | 
 94 | func (lex *lexer) NextToken() Token {
 95 | 	defer func() {
 96 | 		lex.back = false
 97 | 	}()
 98 | 
 99 | 	// 如果有回退，先获取回退
100 | 	if lex.back {
101 | 		return lex.backToken
102 | 	}
103 | 
104 | 	ch, chw := lex.ReadRune()
105 | 	if chw == 0 {
106 | 		return EOF
107 | 	}
108 | 
109 | 	lex.pos = lex.Pos()
110 | 
111 | 	// skip white space
112 | 	for Whitespace&(1<<ch) != 0 {
113 | 		ch, chw = lex.ReadRune()
114 | 	}
115 | 
116 | 	if chw == 0 {
117 | 		return EOF
118 | 	}
119 | 
120 | 	lex.id = ""
121 | 
122 | 	lex.TextReady()
123 | 
124 | 	if '0' <= ch && ch <= '9' { // 数字
125 | 		return Number(lex, ch)
126 | 	}
127 | 
128 | 	if CheckIdent(ch, 0) { // 符号
129 | 		for i := 1; CheckIdent(ch, i); i++ {
130 | 			ch, chw = lex.ReadRune()
131 | 		}
132 | 		lex.id = lex.ReadText()
133 | 		return Lookup(lex.id)
134 | 	}
135 | 
136 | 	switch ch {
137 | 	case '+':
138 | 		return ADD
139 | 	case '-':
140 | 		return SUB
141 | 	case ':':
142 | 		return COLON
143 | 	case ',':
144 | 		return COMMA
145 | 	case ';':
146 | 		lex.id = reader.Comment(lex.Reader)
147 | 		return COMMENT
148 | 	case '"': // 查找字符串，到 " 结束
149 | 		lex.id, _ = reader.String(lex.Reader, '"')
150 | 		return STRING
151 | 	case '[':
152 | 		return LBRACK
153 | 	case ']':
154 | 		return RBRACK
155 | 	default:
156 | 		return ILLEGAL
157 | 	}
158 | }
159 | 
160 | func CheckIdent(ch rune, i int) bool {
161 | 	return ch == '.' || ch == '_' || ch == '@' || unicode.IsLetter(ch) ||
162 | 		ch > utf8.RuneSelf || unicode.IsDigit(ch) && i > 0 // 第一个字符必须是字母或下划线
163 | }
164 | 
165 | func Number(lex *lexer, ch rune) Token {
166 | 	typ, val := reader.Number(lex.Reader, ch)
167 | 	lex.id = val
168 | 
169 | 	if typ == reader.INT_TYPE {
170 | 		return INT
171 | 	}
172 | 	return FLOAT
173 | }
174 | 
175 | func NewLexer(file string) *lexer { // 封装后的读取器
176 | 	return &lexer{Reader: reader.FileReader(file)}
177 | }
178 | 


--------------------------------------------------------------------------------
/compiler/compile/parser/lexer.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"github.com/facelang/face/compiler/compile/token"
  5 | 	"github.com/facelang/face/internal/reader"
  6 | 	"unicode"
  7 | 	"unicode/utf8"
  8 | )
  9 | 
 10 | // Whitespace 对比 map, switch 位掩码 比较效率最高, 忽略 \n
 11 | const Whitespace = 1<<'\t' | 1<<'\r' | 1<<' '
 12 | 
 13 | type lexer struct {
 14 | 	*reader.Reader           // 读取器
 15 | 	pos            token.Pos // 位置信息
 16 | 	identifier     string    // 标识符
 17 | }
 18 | 
 19 | //type lexer struct {
 20 | //	buffer            *buffer       // 读取器
 21 | //	content           string        // 暂存字符
 22 | //	col, line, offset int           // 文件读取指针行列号
 23 | //	back              bool          // 回退标识
 24 | //	backToken         Token // 回退Token
 25 | //}
 26 | 
 27 | //func (l *lexer) init(file string, errFunc ErrorFunc) error {
 28 | //	defer func() { next, _ := lex.ReadByte() }()
 29 | //	return l.buffer.init(file, errFunc)
 30 | //}
 31 | //
 32 | //func (l *lexer) Back(token Token) {
 33 | //	l.back = true
 34 | //	l.backToken = token
 35 | //}
 36 | 
 37 | // NextToken todo 需要处理分号，和换行符， 还需要处理：分支语句中，必须是分号，其它情况可以是换行符或者分号
 38 | func (lex *lexer) NextToken() token.Token {
 39 | 	//defer func() {
 40 | 	//	l.back = false
 41 | 	//}()
 42 | 	//
 43 | 	//// 如果有回退，先获取回退
 44 | 	//if l.back {
 45 | 	//	return l.backToken
 46 | 	//}
 47 | 
 48 | 	ch, chw := lex.ReadRune()
 49 | 	if chw == 0 {
 50 | 		return token.EOF
 51 | 	}
 52 | 
 53 | 	lex.pos = token.Pos(lex.Pos())
 54 | 
 55 | 	// skip white space
 56 | 	for Whitespace&(1<<ch) != 0 {
 57 | 		ch, chw = lex.ReadRune()
 58 | 	}
 59 | 
 60 | 	if chw == 0 {
 61 | 		return token.EOF
 62 | 	}
 63 | 
 64 | 	lex.identifier = ""
 65 | 
 66 | 	// start collecting token text
 67 | 	lex.TextReady()
 68 | 
 69 | 	if '0' <= ch && ch <= '9' { // 数字
 70 | 		return Number(lex, ch)
 71 | 	}
 72 | 
 73 | 	if CheckIdent(ch, 0) { // 符号
 74 | 		for i := 1; CheckIdent(ch, i); i++ {
 75 | 			ch, chw = lex.ReadRune()
 76 | 		}
 77 | 		lex.identifier = lex.ReadText()
 78 | 		return token.Lookup(lex.identifier)
 79 | 	}
 80 | 
 81 | 	switch ch {
 82 | 	case '\n':
 83 | 		return token.NEWLINE
 84 | 	case '+':
 85 | 		return token.ADD
 86 | 	case '-':
 87 | 		return token.SUB
 88 | 	case '*':
 89 | 		return token.MUL
 90 | 	case '/':
 91 | 		next, _ := lex.ReadByte()
 92 | 		if next == '/' {
 93 | 			lex.identifier = reader.Comment(lex.Reader)
 94 | 			return token.COMMENT
 95 | 		}
 96 | 		lex.GoBack()
 97 | 		return token.QUO
 98 | 	case '>':
 99 | 		next, _ := lex.ReadByte()
100 | 		if next == '=' {
101 | 			return token.GEQ
102 | 		} else if next == '>' {
103 | 			return token.SHR
104 | 		} else {
105 | 			lex.GoBack()
106 | 			return token.GTR
107 | 		}
108 | 	case '<':
109 | 		next, _ := lex.ReadByte()
110 | 		if next == '=' {
111 | 			return token.LEQ
112 | 		} else if next == '>' {
113 | 			return token.SHL
114 | 		} else {
115 | 			lex.GoBack()
116 | 			return token.LSS
117 | 		}
118 | 	case '=':
119 | 		next, _ := lex.ReadByte()
120 | 		if next == '=' {
121 | 			return token.EQL
122 | 		}
123 | 		lex.GoBack()
124 | 		return token.ASSIGN
125 | 	case '!':
126 | 		next, _ := lex.ReadByte()
127 | 		if next == '=' {
128 | 			return token.NEQ
129 | 		}
130 | 		lex.GoBack()
131 | 		return token.NOT
132 | 	case ';':
133 | 		return token.SEMICOLON
134 | 	case ',':
135 | 		return token.COMMA
136 | 	case '"': // 查找字符串，到 " 结束, 最后一个字符是 ", 所以不需要回退
137 | 		ident, _ := reader.String(lex.Reader, '"')
138 | 		lex.identifier = ident
139 | 		return token.STRING
140 | 	case '\'': // 读一个字符, 字符串读， \' 结尾， 不需要回退
141 | 		lex.identifier = reader.Char(lex.Reader)
142 | 		return token.CHAR
143 | 	case '`': // todo 多行文本，需要进一步处理为一般字符串
144 | 		lex.identifier = reader.RawString(lex.Reader)
145 | 		return token.STRING
146 | 	case '(':
147 | 		return token.LPAREN
148 | 	case ')':
149 | 		return token.RPAREN
150 | 	case '{':
151 | 		return token.LBRACE
152 | 	case '}':
153 | 		return token.RBRACE
154 | 	default:
155 | 		return token.ILLEGAL
156 | 	}
157 | }
158 | 
159 | func CheckIdent(ch rune, i int) bool {
160 | 	return ch == '.' || ch == '_' || unicode.IsLetter(ch) ||
161 | 		ch > utf8.RuneSelf || unicode.IsDigit(ch) && i > 0 // 第一个字符必须是字母或下划线
162 | }
163 | 
164 | func Number(lex *lexer, ch rune) token.Token {
165 | 	typ, val := reader.Number(lex.Reader, ch)
166 | 	lex.identifier = val
167 | 
168 | 	if typ == reader.INT_TYPE {
169 | 		return token.INT
170 | 	}
171 | 	return token.FLOAT
172 | }
173 | 
174 | func NewLexer(file string) *lexer { // 封装后的读取器
175 | 	return &lexer{Reader: reader.FileReader(file)}
176 | }
177 | 


--------------------------------------------------------------------------------
/compiler/compile/ast/scope.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2009 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // This file implements scopes and the objects they contain.
  6 | 
  7 | package ast
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 	"go/token"
 12 | 	"strings"
 13 | )
 14 | 
 15 | // A Scope maintains the set of named language entities declared
 16 | // in the scope and a link to the immediately surrounding (outer)
 17 | // scope.
 18 | //
 19 | // Deprecated: use the type checker [go/types] instead; see [Object].
 20 | type Scope struct {
 21 | 	Outer   *Scope
 22 | 	Objects map[string]*Object
 23 | }
 24 | 
 25 | // NewScope creates a new scope nested in the outer scope.
 26 | func NewScope(outer *Scope) *Scope {
 27 | 	const n = 4 // initial scope capacity
 28 | 	return &Scope{outer, make(map[string]*Object, n)}
 29 | }
 30 | 
 31 | // Lookup returns the object with the given name if it is
 32 | // found in scope s, otherwise it returns nil. Outer scopes
 33 | // are ignored.
 34 | func (s *Scope) Lookup(name string) *Object {
 35 | 	return s.Objects[name]
 36 | }
 37 | 
 38 | // Insert attempts to insert a named object obj into the scope s.
 39 | // If the scope already contains an object alt with the same name,
 40 | // Insert leaves the scope unchanged and returns alt. Otherwise
 41 | // it inserts obj and returns nil.
 42 | func (s *Scope) Insert(obj *Object) (alt *Object) {
 43 | 	if alt = s.Objects[obj.Name]; alt == nil {
 44 | 		s.Objects[obj.Name] = obj
 45 | 	}
 46 | 	return
 47 | }
 48 | 
 49 | // Debugging support
 50 | func (s *Scope) String() string {
 51 | 	var buf strings.Builder
 52 | 	fmt.Fprintf(&buf, "scope %p {", s)
 53 | 	if s != nil && len(s.Objects) > 0 {
 54 | 		fmt.Fprintln(&buf)
 55 | 		for _, obj := range s.Objects {
 56 | 			fmt.Fprintf(&buf, "\t%s %s\n", obj.Kind, obj.Name)
 57 | 		}
 58 | 	}
 59 | 	fmt.Fprintf(&buf, "}\n")
 60 | 	return buf.String()
 61 | }
 62 | 
 63 | // ----------------------------------------------------------------------------
 64 | // Objects
 65 | 
 66 | // An Object describes a named language entity such as a package,
 67 | // constant, type, variable, function (incl. methods), or label.
 68 | //
 69 | // The Data fields contains object-specific data:
 70 | //
 71 | //	Kind    Data type         Data value
 72 | //	Pkg     *Scope            package scope
 73 | //	Con     int               iota for the respective declaration
 74 | //
 75 | // Deprecated: The relationship between Idents and Objects cannot be
 76 | // correctly computed without type information. For example, the
 77 | // expression T{K: 0} may denote a struct, map, slice, or array
 78 | // literal, depending on the type of T. If T is a struct, then K
 79 | // refers to a field of T, whereas for the other types it refers to a
 80 | // value in the environment.
 81 | //
 82 | // New programs should set the [parser.SkipObjectResolution] parser
 83 | // flag to disable syntactic object resolution (which also saves CPU
 84 | // and memory), and instead use the type checker [go/types] if object
 85 | // resolution is desired. See the Defs, Uses, and Implicits fields of
 86 | // the [types.Info] struct for details.
 87 | type Object struct {
 88 | 	Kind ObjKind
 89 | 	Name string // declared name
 90 | 	Decl any    // corresponding Field, XxxSpec, FuncDecl, LabeledStmt, AssignStmt, Scope; or nil
 91 | 	Data any    // object-specific data; or nil
 92 | 	Type any    // placeholder for type information; may be nil
 93 | }
 94 | 
 95 | // NewObj creates a new object of a given kind and name.
 96 | func NewObj(kind ObjKind, name string) *Object {
 97 | 	return &Object{Kind: kind, Name: name}
 98 | }
 99 | 
100 | // Pos computes the source position of the declaration of an object name.
101 | // The result may be an invalid position if it cannot be computed
102 | // (obj.Decl may be nil or not correct).
103 | func (obj *Object) Ofsset() token.Pos {
104 | 	name := obj.Name
105 | 	switch d := obj.Decl.(type) {
106 | 	case *Field:
107 | 		for _, n := range d.Names {
108 | 			if n.Name == name {
109 | 				return n.Ofsset()
110 | 			}
111 | 		}
112 | 	case *ImportSpec:
113 | 		if d.Name != nil && d.Name.Name == name {
114 | 			return d.Name.Ofsset()
115 | 		}
116 | 		return d.Path.Ofsset()
117 | 	case *ValueSpec:
118 | 		for _, n := range d.Names {
119 | 			if n.Name == name {
120 | 				return n.Ofsset()
121 | 			}
122 | 		}
123 | 	case *TypeSpec:
124 | 		if d.Name.Name == name {
125 | 			return d.Name.Ofsset()
126 | 		}
127 | 	case *FuncDecl:
128 | 		if d.Name.Name == name {
129 | 			return d.Name.Ofsset()
130 | 		}
131 | 	case *LabeledStmt:
132 | 		if d.Label.Name == name {
133 | 			return d.Label.Ofsset()
134 | 		}
135 | 	case *AssignStmt:
136 | 		for _, x := range d.Lhs {
137 | 			if ident, isIdent := x.(*Ident); isIdent && ident.Name == name {
138 | 				return ident.Ofsset()
139 | 			}
140 | 		}
141 | 	case *Scope:
142 | 		// predeclared object - nothing to do for now
143 | 	}
144 | 	return token.NoPos
145 | }
146 | 
147 | // ObjKind describes what an [Object] represents.
148 | type ObjKind int
149 | 
150 | // The list of possible [Object] kinds.
151 | const (
152 | 	Bad ObjKind = iota // for error handling
153 | 	Pkg                // package
154 | 	Con                // constant
155 | 	Typ                // type
156 | 	Var                // variable
157 | 	Fun                // function or method
158 | 	Lbl                // label
159 | )
160 | 
161 | var objKindStrings = [...]string{
162 | 	Bad: "bad",
163 | 	Pkg: "package",
164 | 	Con: "const",
165 | 	Typ: "type",
166 | 	Var: "var",
167 | 	Fun: "func",
168 | 	Lbl: "label",
169 | }
170 | 
171 | func (kind ObjKind) String() string { return objKindStrings[kind] }
172 | 


--------------------------------------------------------------------------------
/internal/os/elf/reader.go:
--------------------------------------------------------------------------------
  1 | package elf
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/binary"
  6 | 	"io"
  7 | 	"os"
  8 | 	"strings"
  9 | )
 10 | 
 11 | type bytesReader struct {
 12 | 	buf   []byte           // 字节数组
 13 | 	r, e  int              // 读取器游标
 14 | 	order binary.ByteOrder // 读取器
 15 | }
 16 | 
 17 | type BytesReader = *bytesReader
 18 | 
 19 | func (r *bytesReader) Byte() byte {
 20 | 	defer func() {
 21 | 		r.r += 1
 22 | 	}()
 23 | 	return r.buf[r.r]
 24 | }
 25 | 
 26 | func (r *bytesReader) Uint16() uint16 {
 27 | 	defer func() {
 28 | 		r.r += 2
 29 | 	}()
 30 | 	return r.order.Uint16(r.buf[r.r : r.r+2])
 31 | }
 32 | 
 33 | func (r *bytesReader) Uint32() uint32 {
 34 | 	defer func() {
 35 | 		r.r += 4
 36 | 	}()
 37 | 	return r.order.Uint32(r.buf[r.r : r.r+4])
 38 | }
 39 | 
 40 | func (r *bytesReader) Uint64() uint64 {
 41 | 	defer func() {
 42 | 		r.r += 8
 43 | 	}()
 44 | 	return r.order.Uint64(r.buf[r.r : r.r+8])
 45 | }
 46 | 
 47 | func (r *bytesReader) UintAuto(bits int) uint64 {
 48 | 	if bits == 1 {
 49 | 		return uint64(r.Uint32())
 50 | 	} else if bits == 2 {
 51 | 		return r.Uint64()
 52 | 	}
 53 | 	panic("不支持的系统位数！")
 54 | }
 55 | 
 56 | func (r *bytesReader) Offset(index int) {
 57 | 	r.r = index
 58 | }
 59 | 
 60 | func (r *bytesReader) Data(begin, length int) []byte {
 61 | 	//if begin+length > r.e {
 62 | 	//	return nil, io.EOF
 63 | 	//}
 64 | 	return r.buf[begin : begin+length]
 65 | }
 66 | 
 67 | func (r *bytesReader) Party(begin, length int) BytesReader {
 68 | 	//if begin+length > r.e {
 69 | 	//	return nil, io.EOF
 70 | 	//}
 71 | 	return NewReader(r.buf[begin:begin+length], r.order)
 72 | }
 73 | 
 74 | func NewReader(data []byte, reader binary.ByteOrder) BytesReader {
 75 | 	return &bytesReader{
 76 | 		buf:   data,
 77 | 		r:     0,
 78 | 		e:     len(data),
 79 | 		order: reader,
 80 | 	}
 81 | }
 82 | 
 83 | // ObjectRead 直接读取对象
 84 | func ObjectRead[T any](r BytesReader) (*T, error) {
 85 | 	ret := new(T)
 86 | 	defer func() {
 87 | 		r.r += binary.Size(*ret)
 88 | 	}()
 89 | 	err := binary.Read(bytes.NewReader(r.buf[r.r:]), r.order, ret)
 90 | 	return ret, err
 91 | }
 92 | 
 93 | // ReadElf 打开 ELF 文件, 需要记录端序
 94 | func ReadElf(file string) (*File, error) {
 95 | 	elf := &File{Name: file}
 96 | 	f, err := os.Open(file)
 97 | 	d, err := io.ReadAll(f)
 98 | 	println(d)
 99 | 	os.Chmod(file, 0666)
100 | 	data, err := os.ReadFile(file)
101 | 	if err != nil {
102 | 		return nil, err
103 | 	}
104 | 	magic := Elf_Magic(data[:EI_NIDENT])
105 | 
106 | 	reader := NewReader(data, magic.Endian())
107 | 	elf.Reader = reader
108 | 
109 | 	elf.Ehdr, err = ObjectRead[Elf32_Ehdr](reader) // 前16位 magic 也读
110 | 	if err != nil {
111 | 		return nil, err
112 | 	}
113 | 
114 | 	// -------------------------------------------
115 | 	// 先解析段表字符串信息
116 | 	// -------------------------------------------
117 | 	offset := int(elf.Ehdr.Shoff)
118 | 	shentsize := int(elf.Ehdr.Shentsize)
119 | 	off := offset + int(elf.Ehdr.Shstrndx)*shentsize
120 | 	next := reader.Party(off, shentsize) // 这里需要解析为指定数据结构
121 | 	// 这个是表头， 记录字符串信息的
122 | 	shstrtab, err := ObjectRead[Elf32_Shdr](next)
123 | 	if err != nil {
124 | 		return nil, err
125 | 	}
126 | 	shstrTabData := reader.Data(int(shstrtab.Offset), int(shstrtab.Size))
127 | 	elf.Shstrtab = shstrTabData
128 | 	elf.ShstrtabSize = int(shstrtab.Size)
129 | 
130 | 	// -------------------------------------------
131 | 	// 解析段表
132 | 	// -------------------------------------------
133 | 	// 读取完整段表
134 | 	shdrTab := make(map[string]*Elf32_Shdr, int(elf.Ehdr.Shnum))
135 | 	shdrNames := make([]string, int(elf.Ehdr.Shnum))
136 | 	for index := 0; index < int(elf.Ehdr.Shnum); index++ {
137 | 		begin := offset + index*shentsize
138 | 		next = reader.Party(begin, shentsize)
139 | 		shdr, err := ObjectRead[Elf32_Shdr](next)
140 | 		if err != nil {
141 | 			return nil, err
142 | 		}
143 | 		name := StringTableName(shstrTabData, shdr.Name)
144 | 		shdrTab[name] = shdr
145 | 		shdrNames[index] = name
146 | 		//if name == "" { //删除空段表项
147 | 		//	shdrTab[name] = nil
148 | 		//} else {
149 | 		//	shdrTab[name] = shdr
150 | 		//}
151 | 	}
152 | 	elf.ShdrTab = shdrTab
153 | 	elf.ShdrNames = shdrNames
154 | 
155 | 	strTab := shdrTab[".strtab"]
156 | 	strTabData := reader.Data(int(strTab.Offset), int(strTab.Size))
157 | 	elf.Strtab = strTabData
158 | 	elf.StrtabSize = int(strTab.Size)
159 | 
160 | 	symTab := shdrTab[".symtab"]
161 | 	symTabSize := 16                           // todo 这个表达式不正确 2 ^ int(symTab.Entsize)      // 16
162 | 	symTabLen := int(symTab.Size) / symTabSize // ➗ 16
163 | 	symTabList := make(map[string]*Elf32_Sym, symTabLen)
164 | 	symNames := make([]string, symTabLen)
165 | 	for i := 0; i < symTabLen; i++ {
166 | 		begin := int(symTab.Offset) + i*symTabSize
167 | 		next = reader.Party(begin, symTabSize)
168 | 		sym, err := ObjectRead[Elf32_Sym](next)
169 | 		if err != nil {
170 | 			return nil, err
171 | 		}
172 | 		name := StringTableName(strTabData, sym.Name)
173 | 		symNames[i] = name
174 | 		symTabList[name] = sym
175 | 		//if name == "" { //无名符号，对于链接没有意义,按照链接器设计需要记录全局和局部符号，避免名字冲突
176 | 		//	symTabList[name] = nil
177 | 		//} else {
178 | 		//	symTabList[name] = sym //加入符号表
179 | 		//}
180 | 	}
181 | 	elf.SymTab = symTabList
182 | 	elf.SymNames = symNames
183 | 
184 | 	elf.RelTab = make([]*Elf32_RelInfo, 0)
185 | 	for name, relTab := range shdrTab { //所有段的重定位项整合
186 | 		if strings.HasPrefix(name, ".rel") { // 重定位段
187 | 			relTabLen := int(relTab.Size) / 8
188 | 			for i := 0; i < relTabLen; i++ {
189 | 				begin := int(relTab.Offset) + i*8
190 | 				next = reader.Party(begin, 8)
191 | 				rel, err := ObjectRead[Elf32_Rel](next)
192 | 				if err != nil {
193 | 					return nil, err
194 | 				}
195 | 				sym := symNames[int(rel.Info>>8)]
196 | 				relName := StringTableName(strTabData, symTabList[sym].Name)
197 | 				elf.RelTab = append(elf.RelTab, &Elf32_RelInfo{
198 | 					SegName: name[4:],
199 | 					Rel:     rel,
200 | 					RelName: relName,
201 | 				})
202 | 			}
203 | 		}
204 | 	}
205 | 
206 | 	return elf, nil
207 | }
208 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/token.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"strconv"
  6 | )
  7 | 
  8 | type Token int
  9 | 
 10 | const (
 11 | 	ILLEGAL Token = iota // 无效标记
 12 | 	EOF                  // 文件结束标记
 13 | 	COMMENT              // 文档注释符
 14 | 
 15 | 	_literal    // 字面量开始标记
 16 | 	IDENT       // main
 17 | 	INT         // 整数类型
 18 | 	FLOAT       // 浮点数
 19 | 	STRING      // 字符串
 20 | 	_literalEnd // 字面量结束标记
 21 | 
 22 | 	_operator    // 运算符
 23 | 	ADD          // +
 24 | 	SUB          // -
 25 | 	LBRACK       // [
 26 | 	COMMA        // ,
 27 | 	RBRACK       // ]
 28 | 	COLON        // :
 29 | 	_operatorEnd // 操作符结束
 30 | 
 31 | 	_keywords
 32 | 	BREAK    // 1
 33 | 	CHAR     // 2
 34 | 	CONTINUE // 3
 35 | 
 36 | 	ELSE   // 4
 37 | 	EXTERN // 5
 38 | 
 39 | 	IF     // 5
 40 | 	IN     // 6
 41 | 	INT    // 7
 42 | 	OUT    // 8
 43 | 	RETURN // 9
 44 | 	VOID   // 11
 45 | 	WHILE  // 12
 46 | 	_keywordsEnd
 47 | 
 48 | 	// 寄存器
 49 | 	BR_AL
 50 | 	BR_CL
 51 | 	BR_DL
 52 | 	BR_BL
 53 | 	BR_AH
 54 | 	BR_CH
 55 | 	BR_DH
 56 | 	BR_BH
 57 | 	DR_EAX
 58 | 	DR_ECX
 59 | 	DR_EDX
 60 | 	DR_EBX
 61 | 	DR_ESP
 62 | 	DR_EBP
 63 | 	DR_ESI
 64 | 	DR_EDI
 65 | 	// 双操作数指令
 66 | 	I_MOV
 67 | 	I_CMP
 68 | 	I_SUB
 69 | 	I_ADD
 70 | 	I_LEA
 71 | 	// 单操作数指令
 72 | 	I_CALL
 73 | 	I_INT
 74 | 	I_IMUL
 75 | 	I_IDIV
 76 | 	I_NEG
 77 | 	I_INC
 78 | 	I_DEC
 79 | 	I_JMP
 80 | 	I_JE
 81 | 	I_JG
 82 | 	I_JL
 83 | 	I_JGE
 84 | 	I_JLE
 85 | 	I_JNE
 86 | 	I_JNA
 87 | 	I_PUSH
 88 | 	I_POP
 89 | 	// 零操作数指令
 90 | 	I_RET
 91 | 	// 汇编指令
 92 | 	K_SEC
 93 | 	K_GLB
 94 | 	K_EQU
 95 | 	K_TIMES
 96 | 	K_DB
 97 | 	K_DW
 98 | 	K_DD
 99 | 
100 | 	// 数据段定义相关的token
101 | 	K_BYTE = iota + _literal + 1 // .byte
102 | 	K_WORD
103 | 	K_LONG
104 | 	K_QUAD
105 | 	K_ASCII
106 | 	K_ASCIZ
107 | 	K_STRING
108 | 	K_REPT
109 | 	K_ENDR
110 | 
111 | 	// 段定义相关的token
112 | 	K_DATA    = iota + _literal + 1 // .data
113 | 	K_TEXT                          // .text
114 | 	K_BSS                           // .bss
115 | 	K_SECTION                       // .section
116 | 	K_GLOBAL                        // .global
117 | 	K_LOCAL                         // .local
118 | 	K_ALIGN                         // .align
119 | 	K_SKIP                          // .skip
120 | 	K_SPACE                         // .space
121 | )
122 | 
123 | var tokens = [...]string{
124 | 	ILLEGAL:  "ILLEGAL",
125 | 	EOF:      "EOF",
126 | 	COMMENT:  "COMMENT",
127 | 	IDENT:    "IDENT",
128 | 	INT:      "INT",
129 | 	FLOAT:    "FLOAT",
130 | 	STRING:   "STRING",
131 | 	ADD:      "+",
132 | 	SUB:      "-",
133 | 	LBRACK:   "[",
134 | 	COMMA:    ",",
135 | 	RBRACK:   "]",
136 | 	COLON:    ":",
137 | 	BREAK:    "break",
138 | 	CHAR:     "char",
139 | 	CONTINUE: "continue",
140 | 	ELSE:     "else",
141 | 	EXTERN:   "extern",
142 | 	IF:       "if",
143 | 	IN:       "in",
144 | 	INT:      "int",
145 | 	OUT:      "out",
146 | 	RETURN:   "return",
147 | 	VOID:     "void",
148 | 	WHILE:    "while",
149 | 
150 | 	//TILDE:     "~",
151 | }
152 | 
153 | var tokenNames = map[Token]string{
154 | 	ILLEGAL:  "ILLEGAL",
155 | 	EOF:      "EOF",
156 | 	COMMENT:  "COMMENT",
157 | 	IDENT:    "IDENT",
158 | 	INT:      "INT",
159 | 	FLOAT:    "FLOAT",
160 | 	STRING:   "STRING",
161 | 	ADD:      "+",
162 | 	SUB:      "-",
163 | 	LBRACK:   "[",
164 | 	COMMA:    ",",
165 | 	RBRACK:   "]",
166 | 	COLON:    ":",
167 | 	BREAK:    "break",
168 | 	CHAR:     "char",
169 | 	CONTINUE: "continue",
170 | 	ELSE:     "else",
171 | 	EXTERN:   "extern",
172 | 	IF:       "if",
173 | 	IN:       "in",
174 | 	INT:      "int",
175 | 	OUT:      "out",
176 | 	RETURN:   "return",
177 | 	VOID:     "void",
178 | 	WHILE:    "while",
179 | 
180 | 	//TILDE:     "~",
181 | 	K_BYTE:    ".byte",
182 | 	K_WORD:    ".word",
183 | 	K_LONG:    ".long",
184 | 	K_QUAD:    ".quad",
185 | 	K_QUAD:    ".float",
186 | 	K_QUAD:    ".double",
187 | 	K_ASCII:   ".ascii",
188 | 	K_ASCIZ:   ".asciz",
189 | 	K_STRING:  ".string",
190 | 	K_REPT:    ".rept",
191 | 	K_ENDR:    ".endr",
192 | 	K_DATA:    ".data",
193 | 	K_TEXT:    ".text",
194 | 	K_BSS:     ".bss",
195 | 	K_SECTION: ".section",
196 | 	K_GLOBAL:  ".global",
197 | 	K_LOCAL:   ".local",
198 | 	K_ALIGN:   ".align",
199 | 	K_SKIP:    ".skip",
200 | 	K_SPACE:   ".space",
201 | }
202 | 
203 | func (tok Token) String() string {
204 | 	s := ""
205 | 	if 0 <= tok && tok < Token(len(tokens)) {
206 | 		s = tokens[tok]
207 | 	}
208 | 	if s == "" {
209 | 		s = "token(" + strconv.Itoa(int(tok)) + ")"
210 | 	}
211 | 	return s
212 | }
213 | 
214 | func (tok Token) Message(id string) string {
215 | 	if tok == INT || tok == FLOAT {
216 | 		return fmt.Sprintf("number:%s", id)
217 | 	} else if tok == STRING {
218 | 		return fmt.Sprintf("string:%s", id)
219 | 	} else if tok == IDENT {
220 | 		return id
221 | 	}
222 | 	return tok.String()
223 | }
224 | 
225 | var keywordsList = []string{
226 | 	"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
227 | 	"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi",
228 | 	"mov", "cmp", "sub", "add", "lea",
229 | 	"call", "int", "imul", "idiv", "neg", "inc", "dec", "jmp", "je", "jg", "jl", "jge", "jle", "jne", "jna", "push", "pop",
230 | 	"ret",
231 | 	"section", "global", "equ", "times", "db", "dw", "dd",
232 | 	"text", "data", "bss", // 添加段名
233 | }
234 | var keywordsTable = []Token{
235 | 	BR_AL, BR_CL, BR_DL, BR_BL, BR_AH, BR_CH, BR_DH, BR_BH,
236 | 	DR_EAX, DR_ECX, DR_EDX, DR_EBX, DR_ESP, DR_EBP, DR_ESI, DR_EDI,
237 | 	I_MOV, I_CMP, I_SUB, I_ADD, I_LEA,
238 | 	I_CALL, I_INT, I_IMUL, I_IDIV, I_NEG, I_INC, I_DEC, I_JMP, I_JE, I_JG, I_JL, I_JGE, I_JLE, I_JNE, I_JNA, I_PUSH, I_POP,
239 | 	I_RET,
240 | 	K_SEC, K_GLB, K_EQU, K_TIMES, K_DB, K_DW, K_DD,
241 | 	IDENT, IDENT, IDENT, // 段名作为标识符处理
242 | }
243 | 
244 | func Keywords(ident string) (Token, bool) {
245 | 	for i, k := range keywordsList {
246 | 		if k == ident {
247 | 			return keywordsTable[i], true
248 | 		}
249 | 	}
250 | 	return ILLEGAL, false
251 | }
252 | 
253 | func Lookup(ident string) Token {
254 | 	for i, k := range keywordsList {
255 | 		if k == ident {
256 | 			return keywordsTable[i]
257 | 		}
258 | 	}
259 | 	return IDENT
260 | }
261 | 
262 | func (tok Token) IsLiteral() bool { return _literal < tok && tok < _literalEnd }
263 | 


--------------------------------------------------------------------------------
/compiler/compile/parser/params.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"github.com/facelang/face/compiler/compile/tokens"
  5 | 	"github.com/facelang/face/internal/prog"
  6 | )
  7 | 
  8 | // const a int = 1 // byte, int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool, string,
  9 | // let b array<int> = [1,2,3]
 10 | // let b map<int, string> = {1: "a", 2: "b", 3: "c"}
 11 | // let c classA = {a: 1, b: 2, c: 3}
 12 | 
 13 | // ParamOrNil = [ IdentifierList ] [ "..." ] Type . 只在参数列表中调用 follow=close
 14 | func ParamOrNil(p *parser, name *prog.Name, follow token.Token) *prog.Field {
 15 | 
 16 | 	pos := p.FilePos
 17 | 	if name != nil {
 18 | 		pos = name.Pos()
 19 | 	}
 20 | 
 21 | 	f := new(prog.Field)
 22 | 	f.pos = pos
 23 | 
 24 | 	if p.token == token.IDENT || name != nil {
 25 | 		// name
 26 | 		if name == nil {
 27 | 			name = p.name()
 28 | 		}
 29 | 
 30 | 		if p.tok == _Dot { // name.***
 31 | 			// name "." ...
 32 | 			f.Type = p.qualifiedName(name)
 33 | 			if typeSetsOk && p.tok == _Operator && p.op == Or {
 34 | 				// name "." name "|" ...
 35 | 				f = p.embeddedElem(f)
 36 | 			}
 37 | 			return f
 38 | 		}
 39 | 
 40 | 		if typeSetsOk && p.tok == _Operator && p.op == Or {
 41 | 			// name "|" ...
 42 | 			f.Type = name
 43 | 			return p.embeddedElem(f)
 44 | 		}
 45 | 
 46 | 		f.Name = name
 47 | 	}
 48 | 
 49 | 	if p.token == prog.DotsType {
 50 | 		// [name] "..." ...
 51 | 		t := new(DotsType)
 52 | 		t.pos = p.pos()
 53 | 		p.next()
 54 | 		t.Elem = p.typeOrNil()
 55 | 		if t.Elem == nil {
 56 | 			t.Elem = p.badExpr()
 57 | 			p.syntaxError("... is missing type")
 58 | 		}
 59 | 		f.Type = t
 60 | 		return f
 61 | 	}
 62 | 
 63 | 	if typeSetsOk && p.tok == _Operator && p.op == Tilde {
 64 | 		// [name] "~" ...
 65 | 		f.Type = p.embeddedElem(nil).Type
 66 | 		return f
 67 | 	}
 68 | 
 69 | 	f.Type = p.typeOrNil()
 70 | 	if typeSetsOk && p.tok == _Operator && p.op == Or && f.Type != nil {
 71 | 		// [name] type "|"
 72 | 		f = p.embeddedElem(f)
 73 | 	}
 74 | 	if f.Name != nil || f.Type != nil {
 75 | 		return f
 76 | 	}
 77 | 
 78 | 	p.syntaxError("expected " + tokstring(follow))
 79 | 	p.advance(_Comma, follow)
 80 | 	return nil
 81 | }
 82 | 
 83 | // Parameters    = "(" [ ParameterList [ "," ] ] ")" .
 84 | // ParameterList = ParameterDecl { "," ParameterDecl } .
 85 | // "(" or "[" has already been consumed.
 86 | // If name != nil, it is the first name after "(" or "[".
 87 | // If typ != nil, name must be != nil, and (name, typ) is the first field in the list.
 88 | // In the result list, either all fields have a name, or no field has a name.
 89 | 
 90 | // p.paramList(nil, nil, _Rbrack, true)
 91 | func paramList(p *parser, close token.Token, requireNames bool) (list []*Field) {
 92 | 
 93 | 	var named int // number of parameters that have an explicit name and type
 94 | 	var typed int // number of parameters that have an explicit type
 95 | 	end := p.list("parameter list", COMMA, close, func() bool {
 96 | 		var par *prog.Field
 97 | 		f := ParamOrNil(p)
 98 | 
 99 | 		name = nil // 1st name was consumed if present
100 | 		typ = nil  // 1st type was consumed if present
101 | 		if par != nil {
102 | 			if debug && par.Name == nil && par.Type == nil {
103 | 				panic("parameter without name or type")
104 | 			}
105 | 			if par.Name != nil && par.Type != nil {
106 | 				named++
107 | 			}
108 | 			if par.Type != nil {
109 | 				typed++
110 | 			}
111 | 			list = append(list, par)
112 | 		}
113 | 		return false
114 | 	})
115 | 
116 | 	if len(list) == 0 {
117 | 		return
118 | 	}
119 | 
120 | 	// distribute parameter types (len(list) > 0)
121 | 	if named == 0 && !requireNames {
122 | 		// all unnamed and we're not in a type parameter list => found names are named types
123 | 		for _, par := range list {
124 | 			if typ := par.Name; typ != nil {
125 | 				par.Type = typ
126 | 				par.Name = nil
127 | 			}
128 | 		}
129 | 	} else if named != len(list) {
130 | 		// some named or we're in a type parameter list => all must be named
131 | 		var errPos Pos // left-most error position (or unknown)
132 | 		var typ Expr   // current type (from right to left)
133 | 		for i := len(list) - 1; i >= 0; i-- {
134 | 			par := list[i]
135 | 			if par.Type != nil {
136 | 				typ = par.Type
137 | 				if par.Name == nil {
138 | 					errPos = StartPos(typ)
139 | 					par.Name = NewName(errPos, "_")
140 | 				}
141 | 			} else if typ != nil {
142 | 				par.Type = typ
143 | 			} else {
144 | 				// par.Type == nil && typ == nil => we only have a par.Name
145 | 				errPos = par.Name.Pos()
146 | 				t := p.badExpr()
147 | 				t.pos = errPos // correct position
148 | 				par.Type = t
149 | 			}
150 | 		}
151 | 		if errPos.IsKnown() {
152 | 			// Not all parameters are named because named != len(list).
153 | 			// If named == typed, there must be parameters that have no types.
154 | 			// They must be at the end of the parameter list, otherwise types
155 | 			// would have been filled in by the right-to-left sweep above and
156 | 			// there would be no error.
157 | 			// If requireNames is set, the parameter list is a type parameter
158 | 			// list.
159 | 			var msg string
160 | 			if named == typed {
161 | 				errPos = end // position error at closing token ) or ]
162 | 				if requireNames {
163 | 					msg = "missing type constraint"
164 | 				} else {
165 | 					msg = "missing parameter type"
166 | 				}
167 | 			} else {
168 | 				if requireNames {
169 | 					msg = "missing type parameter name"
170 | 					// go.dev/issue/60812
171 | 					if len(list) == 1 {
172 | 						msg += " or invalid array length"
173 | 					}
174 | 				} else {
175 | 					msg = "missing parameter name"
176 | 				}
177 | 			}
178 | 			p.syntaxErrorAt(errPos, msg)
179 | 		}
180 | 	}
181 | 
182 | 	return
183 | }
184 | 
185 | func (p *parser) list(context string, sep, close token.Token, f func() bool) prog.FilePos {
186 | 	done := false
187 | 
188 | 	for p.token != token.EOF && p.token != close && !done {
189 | 		done = f()
190 | 
191 | 		if !p.got(sep) && p.token != close {
192 | 			p.errorf("list for %s; missing %s or %s", context, sep, close)
193 | 			return p.FilePos
194 | 		}
195 | 	}
196 | 
197 | 	pos := p.FilePos
198 | 	p.expect(close)
199 | 	return pos
200 | }
201 | 


--------------------------------------------------------------------------------
/docs/ascii.md:
--------------------------------------------------------------------------------
  1 | ### ASCII 码表
  2 | 
  3 | | 十进制  | 十六进制 | 字符 | 描述                        |
  4 | |------|----------|------|-----------------------------|
  5 | | 0    | 0x00     | NUL  | Null                        |
  6 | | 1    | 0x01     | SOH  | Start of Header             |
  7 | | 2    | 0x02     | STX  | Start of Text               |
  8 | | 3    | 0x03     | ETX  | End of Text                 |
  9 | | 4    | 0x04     | EOT  | End of Transmission         |
 10 | | 5    | 0x05     | ENQ  | Enquiry                     |
 11 | | 6    | 0x06     | ACK  | Acknowledge                 |
 12 | | 7    | 0x07     | BEL  | Bell                        |
 13 | | 8    | 0x08     | BS   | Backspace                   |
 14 | | 9    | 0x09     | TAB  | Horizontal Tab              |
 15 | | 10   | 0x0A     | LF   | Line Feed (New Line)        |
 16 | | 11   | 0x0B     | VT   | Vertical Tab                |
 17 | | 12   | 0x0C     | FF   | Form Feed (New Page)        |
 18 | | 13   | 0x0D     | CR   | Carriage Return             |
 19 | | 14   | 0x0E     | SO   | Shift Out                   |
 20 | | 15   | 0x0F     | SI   | Shift In                    |
 21 | | 16   | 0x10     | DLE  | Data Link Escape            |
 22 | | 17   | 0x11     | DC1  | Device Control 1 (XON)      |
 23 | | 18   | 0x12     | DC2  | Device Control 2            |
 24 | | 19   | 0x13     | DC3  | Device Control 3 (XOFF)     |
 25 | | 20   | 0x14     | DC4  | Device Control 4            |
 26 | | 21   | 0x15     | NAK  | Negative Acknowledge        |
 27 | | 22   | 0x16     | SYN  | Synchronous Idle            |
 28 | | 23   | 0x17     | ETB  | End of Transmit Block       |
 29 | | 24   | 0x18     | CAN  | Cancel                      |
 30 | | 25   | 0x19     | EM   | End of Medium               |
 31 | | 26   | 0x1A     | SUB  | Substitute                  |
 32 | | 27   | 0x1B     | ESC  | Escape                      |
 33 | | 28   | 0x1C     | FS   | File Separator              |
 34 | | 29   | 0x1D     | GS   | Group Separator             |
 35 | | 30   | 0x1E     | RS   | Record Separator            |
 36 | | 31   | 0x1F     | US   | Unit Separator              |
 37 | | 32   | 0x20     | SP   | Space                       |
 38 | | 33   | 0x21     | !    | Exclamation Mark            |
 39 | | 34   | 0x22     | "    | Double Quote                |
 40 | | 35   | 0x23     | #    | Number Sign (Hash)          |
 41 | | 36   | 0x24     | $    | Dollar Sign                 |
 42 | | 37   | 0x25     | %    | Percent Sign                |
 43 | | 38   | 0x26     | &    | Ampersand                   |
 44 | | 39   | 0x27     | '    | Single Quote                |
 45 | | 40   | 0x28     | (    | Left Parenthesis            |
 46 | | 41   | 0x29     | )    | Right Parenthesis           |
 47 | | 42   | 0x2A     | *    | Asterisk                    |
 48 | | 43   | 0x2B     | +    | Plus Sign                   |
 49 | | 44   | 0x2C     | ,    | Comma                       |
 50 | | 45   | 0x2D     | -    | Hyphen-Minus                |
 51 | | 46   | 0x2E     | .    | Period (Dot)                |
 52 | | 47   | 0x2F     | /    | Slash (Forward Slash)       |
 53 | | 48   | 0x30     | 0    | Digit 0                     |
 54 | | 49   | 0x31     | 1    | Digit 1                     |
 55 | | 50   | 0x32     | 2    | Digit 2                     |
 56 | | 51   | 0x33     | 3    | Digit 3                     |
 57 | | 52   | 0x34     | 4    | Digit 4                     |
 58 | | 53   | 0x35     | 5    | Digit 5                     |
 59 | | 54   | 0x36     | 6    | Digit 6                     |
 60 | | 55   | 0x37     | 7    | Digit 7                     |
 61 | | 56   | 0x38     | 8    | Digit 8                     |
 62 | | 57   | 0x39     | 9    | Digit 9                     |
 63 | | 58   | 0x3A     | :    | Colon                       |
 64 | | 59   | 0x3B     | ;    | Semicolon                   |
 65 | | 60   | 0x3C     | <    | Less-Than Sign              |
 66 | | 61   | 0x3D     | =    | Equals Sign                 |
 67 | | 62   | 0x3E     | >    | Greater-Than Sign           |
 68 | | 63   | 0x3F     | ?    | Question Mark               |
 69 | | 64   | 0x40     | @    | At Sign                     |
 70 | | 65   | 0x41     | A    | Uppercase A                 |
 71 | | 66   | 0x42     | B    | Uppercase B                 |
 72 | | 67   | 0x43     | C    | Uppercase C                 |
 73 | | 68   | 0x44     | D    | Uppercase D                 |
 74 | | 69   | 0x45     | E    | Uppercase E                 |
 75 | | 70   | 0x46     | F    | Uppercase F                 |
 76 | | 71   | 0x47     | G    | Uppercase G                 |
 77 | | 72   | 0x48     | H    | Uppercase H                 |
 78 | | 73   | 0x49     | I    | Uppercase I                 |
 79 | | 74   | 0x4A     | J    | Uppercase J                 |
 80 | | 75   | 0x4B     | K    | Uppercase K                 |
 81 | | 76   | 0x4C     | L    | Uppercase L                 |
 82 | | 77   | 0x4D     | M    | Uppercase M                 |
 83 | | 78   | 0x4E     | N    | Uppercase N                 |
 84 | | 79   | 0x4F     | O    | Uppercase O                 |
 85 | | 80   | 0x50     | P    | Uppercase P                 |
 86 | | 81   | 0x51     | Q    | Uppercase Q                 |
 87 | | 82   | 0x52     | R    | Uppercase R                 |
 88 | | 83   | 0x53     | S    | Uppercase S                 |
 89 | | 84   | 0x54     | T    | Uppercase T                 |
 90 | | 85   | 0x55     | U    | Uppercase U                 |
 91 | | 86   | 0x56     | V    | Uppercase V                 |
 92 | | 87   | 0x57     | W    | Uppercase W                 |
 93 | | 88   | 0x58     | X    | Uppercase X                 |
 94 | | 89   | 0x59     | Y    | Uppercase Y                 |
 95 | | 90   | 0x5A     | Z    | Uppercase Z                 |
 96 | | 91   | 0x5B     | [    | Left Square Bracket         |
 97 | | 92   | 0x5C     | \    | Backslash                   |
 98 | | 93   | 0x5D     | ]    | Right Square Bracket        |
 99 | | 94   | 0x5E     | ^    | Caret (Circumflex Accent)   |
100 | | 95   | 0x5F     | _    | Underscore                  |
101 | | 96   | 0x60     | `    | Grave Accent                |
102 | | 97   | 0x61     | a    | Lowercase a                 |
103 | | 98   | 0x62     | b    | Lowercase b                 |
104 | | 99   | 0x63     | c    | Lowercase c                 |
105 | | 100  | 0x64     | d    | Lowercase d                 |
106 | | 101  | 0x65     | e    | Lowercase e                 |
107 | | 102  | 0x66     | f    | Lowercase f                 |
108 | | 103  | 0x67     | g    | Lowercase g                 |
109 | | 104  | 0x68     | h    | Lowercase h                 |
110 | | 105  | 0x69     | i    | Lowercase i                 |
111 | | 106  | 0x6A     | j    | Lowercase j                 |
112 | | 107  | 0x6B     | k    | Lowercase k                 |
113 | | 108  | 0x6C     | l    | Lowercase l                 |
114 | | 109  | 0x6D     | m    | Lowercase m                 |
115 | | 110  | 0x6E     | n    | Lowercase n                 |
116 | | 111  | 0x6F     | o    | Lowercase o                 |
117 | | 112  | 0x70     | p    | Lowercase p                 |
118 | | 113  | 0x71     | q    | Lowercase q                 |
119 | | 114  | 0x72     | r    | Lowercase r                 |
120 | | 115  | 0x73     | s    | Lowercase s                 |
121 | | 116  | 0x74     | t    | Lowercase t                 |
122 | | 117  | 0x75     | u    | Lowercase u                 |
123 | | 118  | 0x76     | v    | Lowercase v                 |
124 | | 119  | 0x77     | w    | Lowercase w                 |
125 | | 120  | 0x78     | x    | Lowercase x                 |
126 | | 121  | 0x79     | y    | Lowercase y                 |
127 | | 122  | 0x7A     | z    | Lowercase z                 |
128 | | 123  | 0x7B     | {    | Left Curly Brace            |
129 | | 124  | 0x7C     | \|   | Vertical Bar                |
130 | | 125  | 0x7D     | }    | Right Curly Brace           |
131 | | 126  | 0x7E     | ~    | Tilde                       |
132 | | 127  | 0x7F     | DEL  | Delete                      |
133 | 
134 | 这张表包括了 ASCII 字符集中的所有标准字符，从控制字符到可打印字符。


--------------------------------------------------------------------------------
/compiler/compile/token/token.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2009 The Go Authors. All rights reserved.
  2 | // Use of this source code is governed by a BSD-style
  3 | // license that can be found in the LICENSE file.
  4 | 
  5 | // Package token defines constants representing the lexical tokens of the Go
  6 | // programming language and basic operations on tokens (printing, predicates).
  7 | package token
  8 | 
  9 | import (
 10 | 	"strconv"
 11 | 	"unicode"
 12 | 	"unicode/utf8"
 13 | )
 14 | 
 15 | // Token is the set of lexical tokens of the Go programming language.
 16 | type Token int
 17 | 
 18 | // The list of tokens.
 19 | const (
 20 | 	// Special tokens
 21 | 	ILLEGAL Token = iota
 22 | 	EOF
 23 | 	COMMENT
 24 | 	NEWLINE
 25 | 
 26 | 	literal_beg
 27 | 	// Identifiers and basic type literals
 28 | 	// (these tokens stand for classes of literals)
 29 | 	IDENT  // main
 30 | 	INT    // 12345
 31 | 	FLOAT  // 123.45
 32 | 	IMAG   // 123.45i
 33 | 	CHAR   // 'a', utf8 字符
 34 | 	STRING // "abc"
 35 | 	literal_end
 36 | 
 37 | 	operator_beg
 38 | 	// Operators and delimiters
 39 | 	ADD // +
 40 | 	SUB // -
 41 | 	MUL // *
 42 | 	QUO // /
 43 | 	REM // %
 44 | 
 45 | 	AND     // &
 46 | 	OR      // |
 47 | 	XOR     // ^
 48 | 	SHL     // <<
 49 | 	SHR     // >>
 50 | 	AND_NOT // &^
 51 | 
 52 | 	ADD_ASSIGN // +=
 53 | 	SUB_ASSIGN // -=
 54 | 	MUL_ASSIGN // *=
 55 | 	QUO_ASSIGN // /=
 56 | 	REM_ASSIGN // %=
 57 | 
 58 | 	AND_ASSIGN     // &=
 59 | 	OR_ASSIGN      // |=
 60 | 	XOR_ASSIGN     // ^=
 61 | 	SHL_ASSIGN     // <<=
 62 | 	SHR_ASSIGN     // >>=
 63 | 	AND_NOT_ASSIGN // &^=
 64 | 
 65 | 	LAND  // &&
 66 | 	LOR   // ||
 67 | 	ARROW // <-
 68 | 	INC   // ++
 69 | 	DEC   // --
 70 | 
 71 | 	EQL    // ==
 72 | 	LSS    // <
 73 | 	GTR    // >
 74 | 	ASSIGN // =
 75 | 	NOT    // !
 76 | 
 77 | 	NEQ      // !=
 78 | 	LEQ      // <=
 79 | 	GEQ      // >=
 80 | 	DEFINE   // :=
 81 | 	ELLIPSIS // ...
 82 | 
 83 | 	LPAREN // (
 84 | 	LBRACK // [
 85 | 	LBRACE // {
 86 | 	COMMA  // ,
 87 | 	PERIOD // .
 88 | 
 89 | 	RPAREN    // )
 90 | 	RBRACK    // ]
 91 | 	RBRACE    // }
 92 | 	SEMICOLON // ;
 93 | 	COLON     // :
 94 | 	operator_end
 95 | 
 96 | 	keyword_beg
 97 | 	// Keywords
 98 | 	BREAK
 99 | 	CASE
100 | 	CHAN
101 | 	CONST
102 | 	CONTINUE
103 | 
104 | 	DEFAULT
105 | 	DEFER
106 | 	ELSE
107 | 	FALLTHROUGH
108 | 	FOR
109 | 
110 | 	FUNC
111 | 	GO
112 | 	GOTO
113 | 	IF
114 | 	IMPORT
115 | 	FROM
116 | 
117 | 	INTERFACE
118 | 	MAP
119 | 	PACKAGE
120 | 	RANGE
121 | 	RETURN
122 | 
123 | 	SELECT
124 | 	STRUCT
125 | 	SWITCH
126 | 	TYPE
127 | 	LET
128 | 	keyword_end
129 | 
130 | 	additional_beg
131 | 	// additional tokens, handled in an ad-hoc manner
132 | 	TILDE
133 | 	additional_end
134 | )
135 | 
136 | var tokens = [...]string{
137 | 	ILLEGAL: "ILLEGAL",
138 | 
139 | 	EOF:     "EOF",
140 | 	COMMENT: "COMMENT",
141 | 	NEWLINE: "NEWLINE",
142 | 
143 | 	IDENT:  "IDENT",
144 | 	INT:    "INT",
145 | 	FLOAT:  "FLOAT",
146 | 	IMAG:   "IMAG",
147 | 	CHAR:   "CHAR",
148 | 	STRING: "STRING",
149 | 
150 | 	ADD: "+",
151 | 	SUB: "-",
152 | 	MUL: "*",
153 | 	QUO: "/",
154 | 	REM: "%",
155 | 
156 | 	AND:     "&",
157 | 	OR:      "|",
158 | 	XOR:     "^",
159 | 	SHL:     "<<",
160 | 	SHR:     ">>",
161 | 	AND_NOT: "&^",
162 | 
163 | 	ADD_ASSIGN: "+=",
164 | 	SUB_ASSIGN: "-=",
165 | 	MUL_ASSIGN: "*=",
166 | 	QUO_ASSIGN: "/=",
167 | 	REM_ASSIGN: "%=",
168 | 
169 | 	AND_ASSIGN:     "&=",
170 | 	OR_ASSIGN:      "|=",
171 | 	XOR_ASSIGN:     "^=",
172 | 	SHL_ASSIGN:     "<<=",
173 | 	SHR_ASSIGN:     ">>=",
174 | 	AND_NOT_ASSIGN: "&^=",
175 | 
176 | 	LAND:  "&&",
177 | 	LOR:   "||",
178 | 	ARROW: "<-",
179 | 	INC:   "++",
180 | 	DEC:   "--",
181 | 
182 | 	EQL:    "==",
183 | 	LSS:    "<",
184 | 	GTR:    ">",
185 | 	ASSIGN: "=",
186 | 	NOT:    "!",
187 | 
188 | 	NEQ:      "!=",
189 | 	LEQ:      "<=",
190 | 	GEQ:      ">=",
191 | 	DEFINE:   ":=",
192 | 	ELLIPSIS: "...",
193 | 
194 | 	LPAREN: "(",
195 | 	LBRACK: "[",
196 | 	LBRACE: "{",
197 | 	COMMA:  ",",
198 | 	PERIOD: ".",
199 | 
200 | 	RPAREN:    ")",
201 | 	RBRACK:    "]",
202 | 	RBRACE:    "}",
203 | 	SEMICOLON: ";",
204 | 	COLON:     ":",
205 | 
206 | 	BREAK:    "break",
207 | 	CASE:     "case",
208 | 	CHAN:     "chan",
209 | 	CONST:    "const",
210 | 	CONTINUE: "continue",
211 | 
212 | 	DEFAULT:     "default",
213 | 	DEFER:       "defer",
214 | 	ELSE:        "else",
215 | 	FALLTHROUGH: "fallthrough",
216 | 	FOR:         "for",
217 | 
218 | 	FUNC:   "func",
219 | 	GO:     "go",
220 | 	GOTO:   "goto",
221 | 	IF:     "if",
222 | 	IMPORT: "import",
223 | 	FROM:   "from",
224 | 
225 | 	INTERFACE: "interface",
226 | 	MAP:       "map",
227 | 	PACKAGE:   "package",
228 | 	RANGE:     "range",
229 | 	RETURN:    "return",
230 | 
231 | 	SELECT: "select",
232 | 	STRUCT: "struct",
233 | 	SWITCH: "switch",
234 | 	TYPE:   "type",
235 | 	LET:    "let",
236 | 
237 | 	TILDE: "~",
238 | }
239 | 
240 | // String returns the string corresponding to the token tok.
241 | // For operators, delimiters, and keywords the string is the actual
242 | // token CHARacter sequence (e.g., for the token [ADD], the string is
243 | // "+"). For all other tokens the string corresponds to the token
244 | // constant name (e.g. for the token [IDENT], the string is "IDENT").
245 | func (tok Token) String() string {
246 | 	s := ""
247 | 	if 0 <= tok && tok < Token(len(tokens)) {
248 | 		s = tokens[tok]
249 | 	}
250 | 	if s == "" {
251 | 		s = "token(" + strconv.Itoa(int(tok)) + ")"
252 | 	}
253 | 	return s
254 | }
255 | 
256 | // A set of constants for precedence-based expression parsing.
257 | // Non-operators have lowest precedence, followed by operators
258 | // starting with precedence 1 up to unary operators. The highest
259 | // precedence serves as "catch-all" precedence for selector,
260 | // indexing, and other operator and delimiter tokens.
261 | const (
262 | 	LowestPrec  = 0 // non-operators
263 | 	UnaryPrec   = 6
264 | 	HighestPrec = 7
265 | )
266 | 
267 | // Precedence returns the operator precedence of the binary
268 | // operator op. If op is not a binary operator, the result
269 | // is LowestPrecedence.
270 | func (op Token) Precedence() int {
271 | 	switch op {
272 | 	case LOR: // ||
273 | 		return 1
274 | 	case LAND:
275 | 		return 2 // &&
276 | 	case EQL, NEQ, LSS, LEQ, GTR, GEQ:
277 | 		return 3 // 逻辑运算
278 | 	case ADD, SUB, OR, XOR:
279 | 		return 4 // +-|^
280 | 	case MUL, QUO, REM, SHL, SHR, AND, AND_NOT:
281 | 		return 5 // */% << >> & &^
282 | 	}
283 | 	return LowestPrec
284 | }
285 | 
286 | var keywords map[string]Token
287 | 
288 | func init() {
289 | 	keywords = make(map[string]Token, keyword_end-(keyword_beg+1))
290 | 	for i := keyword_beg + 1; i < keyword_end; i++ {
291 | 		keywords[tokens[i]] = i
292 | 	}
293 | }
294 | 
295 | // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword).
296 | func Lookup(ident string) Token {
297 | 	if tok, is_keyword := keywords[ident]; is_keyword {
298 | 		return tok
299 | 	}
300 | 	return IDENT
301 | }
302 | 
303 | // Predicates
304 | 
305 | // IsLiteral returns true for tokens corresponding to identifiers
306 | // and basic type literals; it returns false otherwise.
307 | func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end }
308 | 
309 | // IsOperator returns true for tokens corresponding to operators and
310 | // delimiters; it returns false otherwise.
311 | func (tok Token) IsOperator() bool {
312 | 	return (operator_beg < tok && tok < operator_end) || tok == TILDE
313 | }
314 | 
315 | // IsKeyword returns true for tokens corresponding to keywords;
316 | // it returns false otherwise.
317 | func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end }
318 | 
319 | // IsExported reports whether name starts with an upper-case letter.
320 | func IsExported(name string) bool {
321 | 	ch, _ := utf8.DecodeRuneInString(name)
322 | 	return unicode.IsUpper(ch)
323 | }
324 | 
325 | // IsKeyword reports whether name is a Go keyword, such as "func" or "return".
326 | func IsKeyword(name string) bool {
327 | 	// TODO: opt: use a perfect hash function instead of a global map.
328 | 	_, ok := keywords[name]
329 | 	return ok
330 | }
331 | 
332 | // IsIdentifier reports whether name is a Go identifier, that is, a non-empty
333 | // string made up of letters, digits, and underscores, where the first CHARacter
334 | // is not a digit. Keywords are not identifiers.
335 | func IsIdentifier(name string) bool {
336 | 	if name == "" || IsKeyword(name) {
337 | 		return false
338 | 	}
339 | 	for i, c := range name {
340 | 		if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
341 | 			return false
342 | 		}
343 | 	}
344 | 	return true
345 | }
346 | 
347 | func TokenLabel(token Token, ident string) string {
348 | 	if token.IsLiteral() {
349 | 		return ident
350 | 	}
351 | 	return token.String()
352 | }
353 | 
354 | //type Token rune
355 | //
356 | //const (
357 | //	ILLEGAL Token = (1 << 7) - iota // error
358 | //	EOF                             // 结束
359 | //	COMMENT                         // 注释
360 | //	NEWLINE                         // \n 换行符
361 | //	IDENT                           // label
362 | //	INT                             // 123456
363 | //	FLOAT                           // 123.456
364 | //	IMAG                            // 123.1i 复数
365 | //	CHAR                            // ''
366 | //	STRING                          // "", ``
367 | //)
368 | //
369 | //LET NameTable = [...]string{
370 | //	ILLEGAL: "ILLEGAL",
371 | //
372 | //	EOF:     "EOF",
373 | //	COMMENT: "COMMENT",
374 | //	NEWLINE: "NEWLINE",
375 | //
376 | //	IDENT:  "IDENT",
377 | //	INT:    "INT",
378 | //	FLOAT:  "FLOAT",
379 | //	IMAG:   "IMAG",
380 | //	CHAR:   "CHAR",
381 | //	STRING: "STRING",
382 | //}
383 | //
384 | //func (token Token) String() (name string) {
385 | //	if token >= 0 && token <= ILLEGAL {
386 | //		name = NameTable[token]
387 | //	}
388 | //	if name == "" {
389 | //		name = "token(" + strconv.Itoa(int(token)) + ")"
390 | //	}
391 | //	return name
392 | //}
393 | //
394 | //func (token Token) Label(ident string) string {
395 | //	if token.IsLiteral() {
396 | //		return ident
397 | //	}
398 | //	return token.String()
399 | //}
400 | //
401 | //func (token Token) IsLiteral() bool { return IDENT <= token && token <= STRING }
402 | 


--------------------------------------------------------------------------------
/compiler/compile/parser/parser.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"github.com/facelang/face/compiler/compile/ast"
  6 | 	"github.com/facelang/face/compiler/compile/token"
  7 | )
  8 | 
  9 | type parser struct {
 10 | 	*lexer              // 符号读取器
 11 | 	token   token.Token // 符号
 12 | 	literal string      // 字面量
 13 | 	exprLev int         // 表达式层级
 14 | 	inRhs   bool        // 是否右值表达式
 15 | 	nestLev int         // 递归嵌套计数器
 16 | }
 17 | 
 18 | func (p *parser) next() {
 19 | 	for {
 20 | 		p.token = p.NextToken()
 21 | 		p.literal += p.identifier
 22 | 		if p.token == token.COMMENT {
 23 | 			continue
 24 | 		}
 25 | 		if p.token == token.NEWLINE {
 26 | 			continue
 27 | 		}
 28 | 		break
 29 | 	}
 30 | }
 31 | 
 32 | func (p *parser) got(token token.Token) bool {
 33 | 	if p.token == token {
 34 | 		p.next()
 35 | 		return true
 36 | 	}
 37 | 	return false
 38 | }
 39 | 
 40 | func (p *parser) error(pos token.Pos, msg string) {
 41 | 	//if p.errors.Len() > 10 {
 42 | 	//	panic(p.errors)
 43 | 	//}
 44 | 	//p.errors.Add(pos, msg)
 45 | 	panic(fmt.Errorf("%s:%d:%d: %s", pos, p.literal, pos, msg))
 46 | }
 47 | 
 48 | func (p *parser) errorf(format string, args ...interface{}) {
 49 | 	//p.errors.Add(p.pos, fmt.Sprintf(format, args...))
 50 | }
 51 | 
 52 | func (p *parser) expect(token token.Token) token.Pos {
 53 | 	pos := p.pos
 54 | 	if p.token != token {
 55 | 		p.unexpect(token.String())
 56 | 	}
 57 | 
 58 | 	p.next()
 59 | 	return pos
 60 | }
 61 | 
 62 | func (p *parser) unexpect(except string) {
 63 | 	found := token.TokenLabel(p.token, p.identifier)
 64 | 	p.errorf("except %s, found %s", except, found)
 65 | }
 66 | 
 67 | // ----------------------------------------------------------------------------
 68 | // Identifiers
 69 | 
 70 | // name = identifier .
 71 | func (p *parser) name() *ast.Name {
 72 | 	if p.token != token.IDENT {
 73 | 		p.unexpect("identifier")
 74 | 	}
 75 | 
 76 | 	n := new(ast.Name)
 77 | 	n.Pos = p.pos
 78 | 	n.Name = p.literal
 79 | 
 80 | 	p.next()
 81 | 	return n
 82 | }
 83 | 
 84 | // nameList = name { "," name } .
 85 | func (p *parser) nameList(name *ast.Name) []*ast.Name {
 86 | 	list := []*ast.Name{name}
 87 | 	for p.token == token.COMMA {
 88 | 		p.next()
 89 | 		list = append(list, p.name())
 90 | 	}
 91 | 	return list
 92 | }
 93 | 
 94 | // 参考 ES6 import {} from "" 语法
 95 | // 暂不支持解包，只支持两种语法：
 96 | // import name from ""
 97 | // import ""
 98 | func (p *parser) pkg() *ast.Package {
 99 | 	d := &ast.Package{Pos: p.expect(token.IMPORT)}
100 | 
101 | 	if p.token == token.IDENT {
102 | 		d.Name = p.literal
103 | 		p.expect(token.FROM)
104 | 	}
105 | 
106 | 	d.Path = p.literal
107 | 	return d
108 | }
109 | 
110 | // const name1, name2, ... type = val1, val2, ...
111 | // let name1, name2, ... type = val1, val2, ...
112 | func (p *parser) genDecl(require token.Token) ast.Decl {
113 | 	pos := p.expect(require)
114 | 
115 | 	names := p.nameList(p.name())
116 | 	var typ ast.Expr
117 | 	var values []ast.Expr
118 | 	if p.token != token.EOF && p.token != token.SEMICOLON && p.token != token.RPAREN {
119 | 		typ = p.tryIdentOrType()
120 | 		if p.token == token.ASSIGN {
121 | 			p.next()
122 | 			values = exprList(p, true)
123 | 		}
124 | 	}
125 | 
126 | 	return &ast.GenDecl{
127 | 		Pos:    pos,
128 | 		Token:  require,
129 | 		Names:  names,
130 | 		Type:   typ,
131 | 		Values: values,
132 | 	}
133 | }
134 | 
135 | func (p *parser) funcDecl() ast.Decl {
136 | 	pos := p.expect(token.FUNC)
137 | 	name := p.name()
138 | 
139 | 	// 参数列表，包括泛型参数
140 | 	_, params := p.parseParameters(true)
141 | 
142 | 	results := p.parseResult() // (...) 返回结果
143 | 
144 | 	var body *ast.BlockStmt
145 | 	switch p.token {
146 | 	case token.LBRACE: // {}
147 | 		body = p.parseBody()
148 | 	case token.ASSIGN:
149 | 	// todo 单行表达式
150 | 	default:
151 | 		// 第二种情况： func func2(a, b int) [int] = a + b
152 | 		// 第三种情况： const func3 = (a, b) => a + b
153 | 		// 				const func4 = func() {}
154 | 		//				const func5 = func4 别名
155 | 		panic("函数声明 func name(){} 或者 func name() = express")
156 | 	}
157 | 
158 | 	return &ast.FuncDecl{
159 | 		Pos:  pos,
160 | 		Name: name,
161 | 		Type: &ast.FuncType{
162 | 			Params:  params,
163 | 			Results: results,
164 | 		},
165 | 		Body: body,
166 | 	}
167 | }
168 | 
169 | // SourceFile = { ImportDecl ";" } { TopLevelDecl ";" } .
170 | func (p *parser) parseFile() *ast.File {
171 | 	f := new(ast.File)
172 | 
173 | 	prev := token.EOF
174 | 	for p.token != token.EOF {
175 | 		prev = p.token
176 | 
177 | 		switch p.token {
178 | 		case token.IMPORT:
179 | 			if prev != token.IMPORT {
180 | 				p.error(p.pos, "import 语法只能出现在文件头部！")
181 | 			}
182 | 			f.Imports = append(f.Imports, p.pkg())
183 | 		case token.CONST, token.LET:
184 | 			f.DeclList = append(f.DeclList, p.genDecl(p.token))
185 | 		case token.FUNC:
186 | 			p.next()
187 | 			f.DeclList = append(f.DeclList, p.funcDecl())
188 | 		default:
189 | 			p.error(p.pos, "顶层语法仅支持 const, let, type, func 关键字定义！")
190 | 		}
191 | 	}
192 | 
193 | 	return f
194 | }
195 | 
196 | func (p *parser) parseBody() *ast.BlockStmt {
197 | 	lbrace := p.expect(token.LBRACE) // {
198 | 	list := p.parseStmtList()
199 | 	rbrace := p.expect(token.RBRACE) // }
200 | 
201 | 	return &ast.BlockStmt{Lbrace: lbrace, List: list, Rbrace: rbrace}
202 | }
203 | 
204 | func (p *parser) parseBlockStmt() *ast.BlockStmt {
205 | 	return p.parseBody()
206 | }
207 | 
208 | // gotAssign = "=" .
209 | func (p *parser) gotAssign() bool {
210 | 	if p.token == token.ASSIGN {
211 | 		p.next()
212 | 		return true
213 | 	}
214 | 	return false
215 | }
216 | 
217 | // block{}, case:, select case 会调用
218 | func (p *parser) parseStmtList() (list []ast.Stmt) {
219 | 	for p.token != token.CASE && p.token != token.DEFAULT && p.token != token.RBRACE && p.token != token.EOF {
220 | 		list = append(list, p.parseStmt())
221 | 	}
222 | 
223 | 	return
224 | }
225 | 
226 | func (p *parser) parseStmt() (s ast.Stmt) {
227 | 	defer decNestLev(incNestLev(p))
228 | 
229 | 	switch p.token {
230 | 	case token.CONST, token.LET:
231 | 		s = &ast.DeclStmt{Decl: p.genDecl(p.token)}
232 | 	case
233 | 		token.IDENT, token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operands
234 | 		token.LBRACK, token.STRUCT, token.MAP, token.CHAN, token.INTERFACE, // composite types
235 | 		token.ADD, token.SUB, token.MUL, token.AND, token.XOR, token.ARROW, token.NOT: // unary operators
236 | 		s, _ = p.parseSimpleStmt(labelOk)
237 | 		// because of the required look-ahead, labeled statements are
238 | 		// parsed by parseSimpleStmt - don't expect a semicolon after
239 | 		// them
240 | 		if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt {
241 | 			p.expectSemi()
242 | 		}
243 | 	case token.RETURN:
244 | 		s = p.parseReturnStmt()
245 | 	case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH:
246 | 		s = p.parseBranchStmt(p.token)
247 | 		// todo 存在块代码嵌套需要处理{ {} }
248 | 	case token.IF:
249 | 		s = p.parseIfStmt()
250 | 	case token.SWITCH:
251 | 		s = p.parseSwitchStmt()
252 | 	case token.FOR:
253 | 		s = p.parseForStmt()
254 | 	case token.SEMICOLON:
255 | 		// Is it ever possible to have an implicit semicolon
256 | 		// producing an empty statement in a valid program?
257 | 		// (handle correctly anyway)
258 | 		s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: p.lit == "\n"}
259 | 		p.next()
260 | 	case token.RBRACE:
261 | 		// a semicolon may be omitted before a closing "}"
262 | 		s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: true}
263 | 	default:
264 | 		// no statement found
265 | 		pos := p.pos
266 | 		p.errorExpected(pos, "statement")
267 | 		p.advance(stmtStart)
268 | 		s = &ast.BadStmt{From: pos, To: p.pos}
269 | 	}
270 | 
271 | 	return
272 | }
273 | 
274 | // ----------------------------------------------------------------------------
275 | // Statements
276 | 
277 | // Parsing modes for parseSimpleStmt.
278 | const (
279 | 	basic = iota
280 | 	labelOk
281 | 	rangeOk
282 | )
283 | 
284 | // parseSimpleStmt returns true as 2nd result if it parsed the assignment
285 | // of a range clause (with mode == rangeOk). The returned statement is an
286 | // assignment with a right-hand side that is a single unary expression of
287 | // the form "range x". No guarantees are given for the left-hand side.
288 | func (p *parser) parseSimpleStmt(mode int) (ast.Stmt, bool) {
289 | 	if p.trace {
290 | 		defer un(trace(p, "SimpleStmt"))
291 | 	}
292 | 
293 | 	x := p.parseList(false)
294 | 
295 | 	switch p.tok {
296 | 	case
297 | 		token.DEFINE, token.ASSIGN, token.ADD_ASSIGN,
298 | 		token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN,
299 | 		token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN,
300 | 		token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN:
301 | 		// assignment statement, possibly part of a range clause
302 | 		pos, tok := p.pos, p.tok
303 | 		p.next()
304 | 		var y []ast.Expr
305 | 		isRange := false
306 | 		if mode == rangeOk && p.tok == token.RANGE && (tok == token.DEFINE || tok == token.ASSIGN) {
307 | 			pos := p.pos
308 | 			p.next()
309 | 			y = []ast.Expr{&ast.UnaryExpr{OpPos: pos, Op: token.RANGE, X: p.parseRhs()}}
310 | 			isRange = true
311 | 		} else {
312 | 			y = p.parseList(true)
313 | 		}
314 | 		return &ast.AssignStmt{Lhs: x, TokPos: pos, Tok: tok, Rhs: y}, isRange
315 | 	}
316 | 
317 | 	if len(x) > 1 {
318 | 		p.errorExpected(x[0].Pos(), "1 expression")
319 | 		// continue with first expression
320 | 	}
321 | 
322 | 	switch p.tok {
323 | 	case token.COLON:
324 | 		// labeled statement
325 | 		colon := p.pos
326 | 		p.next()
327 | 		if label, isIdent := x[0].(*ast.Ident); mode == labelOk && isIdent {
328 | 			// Go spec: The scope of a label is the body of the function
329 | 			// in which it is declared and excludes the body of any nested
330 | 			// function.
331 | 			stmt := &ast.LabeledStmt{Label: label, Colon: colon, Stmt: p.parseStmt()}
332 | 			return stmt, false
333 | 		}
334 | 		// The label declaration typically starts at x[0].Pos(), but the label
335 | 		// declaration may be erroneous due to a token after that position (and
336 | 		// before the ':'). If SpuriousErrors is not set, the (only) error
337 | 		// reported for the line is the illegal label error instead of the token
338 | 		// before the ':' that caused the problem. Thus, use the (latest) colon
339 | 		// position for error reporting.
340 | 		p.error(colon, "illegal label declaration")
341 | 		return &ast.BadStmt{From: x[0].Pos(), To: colon + 1}, false
342 | 
343 | 	case token.ARROW:
344 | 		// send statement
345 | 		arrow := p.pos
346 | 		p.next()
347 | 		y := p.parseRhs()
348 | 		return &ast.SendStmt{Chan: x[0], Arrow: arrow, Value: y}, false
349 | 
350 | 	case token.INC, token.DEC:
351 | 		// increment or decrement
352 | 		s := &ast.IncDecStmt{X: x[0], TokPos: p.pos, Tok: p.token}
353 | 		p.next()
354 | 		return s, false
355 | 	}
356 | 
357 | 	// expression
358 | 	return &ast.ExprStmt{X: x[0]}, false
359 | }
360 | 
361 | func (p *parser) parseReturnStmt() *ast.ReturnStmt {
362 | 	pos := p.pos
363 | 	p.expect(token.RETURN)
364 | 	var x []ast.Expr
365 | 	if p.token != token.SEMICOLON && p.token != token.RBRACE {
366 | 		x = exprList(p, true)
367 | 	}
368 | 	p.expectSemi()
369 | 
370 | 	return &ast.ReturnStmt{Return: pos, Results: x}
371 | }
372 | 
373 | func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt {
374 | 	pos := p.expect(tok)
375 | 	var label *ast.Name
376 | 	if tok != token.FALLTHROUGH && p.token == token.IDENT {
377 | 		label = p.name()
378 | 	}
379 | 	p.expectSemi()
380 | 
381 | 	return &ast.BranchStmt{TokPos: pos, Tok: tok, Label: label}
382 | }
383 | 
384 | func (p *parser) makeExpr(s ast.Stmt, want string) ast.Expr {
385 | 	if s == nil {
386 | 		return nil
387 | 	}
388 | 	if es, isExpr := s.(*ast.ExprStmt); isExpr {
389 | 		return es.X
390 | 	}
391 | 	found := "simple statement"
392 | 	if _, isAss := s.(*ast.AssignStmt); isAss {
393 | 		found = "assignment"
394 | 	}
395 | 	p.error(s.Position(), fmt.Sprintf("expected %s, found %s (missing parentheses around composite literal?)", want, found))
396 | 	return &ast.BadExpr{From: s.Position(), To: p.safePos(s.End())}
397 | }
398 | 
399 | func (p *parser) parseIfHeader() (init ast.Stmt, cond ast.Expr) {
400 | 	if p.token == token.LBRACE {
401 | 		p.error(p.pos, "missing condition in if statement")
402 | 		cond = &ast.BadExpr{From: p.pos, To: p.pos}
403 | 		return
404 | 	}
405 | 	// p.tok != token.LBRACE
406 | 
407 | 	prevLev := p.exprLev // 记录层级
408 | 	p.exprLev = -1
409 | 
410 | 	if p.token != token.SEMICOLON { // 初始化语句
411 | 		// accept potential variable declaration but complain
412 | 		if p.token == token.LET {
413 | 			p.next()
414 | 			p.error(p.pos, "var declaration not allowed in if initializer")
415 | 		}
416 | 		init, _ = p.parseSimpleStmt(basic)
417 | 	}
418 | 
419 | 	var condStmt ast.Stmt // 条件语句
420 | 	var semi struct {
421 | 		pos token.Pos
422 | 		lit string // ";" or "\n"; valid if pos.IsValid()
423 | 	}
424 | 	if p.token != token.LBRACE { // {}
425 | 		if p.token == token.SEMICOLON { // ;
426 | 			semi.pos = p.pos
427 | 			semi.lit = p.identifier
428 | 			p.next()
429 | 		} else {
430 | 			p.expect(token.SEMICOLON)
431 | 		}
432 | 		if p.token != token.LBRACE { // 条件语句, 可能是 if ; {}
433 | 			condStmt, _ = p.parseSimpleStmt(basic)
434 | 		}
435 | 	} else {
436 | 		condStmt = init
437 | 		init = nil
438 | 	}
439 | 
440 | 	if condStmt != nil {
441 | 		cond = p.makeExpr(condStmt, "boolean expression")
442 | 	} else if semi.pos.IsValid() {
443 | 		if semi.lit == "\n" {
444 | 			p.error(semi.pos, "unexpected newline, expecting { after if clause")
445 | 		} else {
446 | 			p.error(semi.pos, "missing condition in if statement")
447 | 		}
448 | 	}
449 | 
450 | 	// make sure we have a valid AST
451 | 	if cond == nil {
452 | 		cond = &ast.BadExpr{From: p.pos, To: p.pos}
453 | 	}
454 | 
455 | 	p.exprLev = prevLev
456 | 	return
457 | }
458 | 
459 | func (p *parser) parseIfStmt() *ast.IfStmt {
460 | 	defer decNestLev(incNestLev(p))
461 | 
462 | 	pos := p.expect(token.IF)
463 | 
464 | 	init, cond := p.parseIfHeader()
465 | 	body := p.parseBody() // parseBlockStmt
466 | 
467 | 	var else_ ast.Stmt
468 | 	if p.token == token.ELSE {
469 | 		p.next()
470 | 		switch p.token {
471 | 		case token.IF:
472 | 			else_ = p.parseIfStmt()
473 | 		case token.LBRACE:
474 | 			else_ = p.parseBlockStmt()
475 | 			p.expectSemi()
476 | 		default:
477 | 			p.errorExpected(p.pos, "if statement or block")
478 | 			else_ = &ast.BadStmt{From: p.pos, To: p.pos}
479 | 		}
480 | 	} else {
481 | 		p.expectSemi()
482 | 	}
483 | 
484 | 	return &ast.IfStmt{If: pos, Init: init, Cond: cond, Body: body, Else: else_}
485 | }
486 | 


--------------------------------------------------------------------------------
/internal/os/elf/file.go:
--------------------------------------------------------------------------------
  1 | package elf
  2 | 
  3 | import (
  4 | 	"encoding/binary"
  5 | 	"fmt"
  6 | )
  7 | 
  8 | // Elf_Magic elf 文件魔术信息(32bit/64bit 通用)
  9 | type Elf_Magic [EI_NIDENT]byte
 10 | 
 11 | // 32位 ELF 文件头结构
 12 | //type Header32 struct {
 13 | //	Magic      [4]byte // ELF 魔数 0x7F, 0x45, 0x4C, 0x46 - 对应ASCII码为 \x7FELF
 14 | //	Class      byte    // 文件类型 (32/64位)
 15 | //	Data       byte    // 字节序 0x01：小端序(Little Endian)，低字节在前 0x02：大端序(Big Endian)，高字节在前
 16 | //	Version    byte    // ELF 版本 通常为0x01，表示原始ELF格式规范版本
 17 | //	OSABI      byte    // 操作系统 ABI(0x00：System V 0x01：HP-UX 0x02：NetBSD 0x03：Linux 0x06：Solaris 0x09：FreeBSD 0x0C：OpenBSD)
 18 | //	ABIVersion byte    // ABI 版本(通常依赖于特定的ABI，对于System V通常为0x00)
 19 | //  第10-16个字节 (9-15)：填充字节 这些字节为保留字节，通常填充为0，保留供将来使用
 20 | //	Type       uint16  // 文件类型
 21 | //	Machine    uint16  // 机器类型
 22 | //	Entry      uint32  // 程序入口点
 23 | //	Phoff      uint32  // 程序头表偏移
 24 | //	Shoff      uint32  // 节头表偏移
 25 | //	Flags      uint32  // 处理器特定标志
 26 | //	Ehsize     uint16  // ELF 头大小
 27 | //	Phentsize  uint16  // 程序头表项大小
 28 | //	Phnum      uint16  // 程序头表项数量
 29 | //	Shentsize  uint16  // 节头表项大小
 30 | //	Shnum      uint16  // 节头表项数量
 31 | //	Shstrndx   uint16  // 节名字符串表索引
 32 | //}
 33 | 
 34 | func (m Elf_Magic) Bits() int {
 35 | 	return int(m[4])
 36 | }
 37 | 
 38 | func (m Elf_Magic) Endian() binary.ByteOrder {
 39 | 	if m[5] == 1 {
 40 | 		return binary.LittleEndian
 41 | 	} else if m[5] == 2 {
 42 | 		return binary.BigEndian
 43 | 	}
 44 | 	panic("不支持的字节序")
 45 | }
 46 | 
 47 | type Elf32_Phdr struct {
 48 | 	Type   Elf32_Word
 49 | 	Offset Elf32_Off
 50 | 	VAddr  Elf32_Addr
 51 | 	Paddr  Elf32_Addr
 52 | 	Filesz Elf32_Word
 53 | 	Memsz  Elf32_Word
 54 | 	Flags  Elf32_Word
 55 | 	Align  Elf32_Word
 56 | }
 57 | 
 58 | // Elf32_Ehdr ELF32文件头结构
 59 | type Elf32_Ehdr struct {
 60 | 	Magic     Elf_Magic  // (16)魔数和相关信息
 61 | 	Type      Elf32_Half // (2) 0 Unknown, 1 32-bit, 2 64-bit
 62 | 	Machine   Elf32_Half // (2) 架构类型
 63 | 	Version   Elf32_Word // (4) 0 或者 1
 64 | 	Entry     Elf32_Addr // (8) [32/64] 入口点虚拟地址(32bit 占32位 64bit占64位)
 65 | 	Phoff     Elf32_Off  // (8) [32/64] 程序头表偏移(按位占用地址宽度)
 66 | 	Shoff     Elf32_Off  // (8) [32/64] 节头表偏移(按位占用地址宽度)
 67 | 	Flags     Elf32_Word // (4) 处理器特定标志
 68 | 	Ehsize    Elf32_Half // (2) ELF头部大小
 69 | 	Phentsize Elf32_Half // (2) 程序头表项大小
 70 | 	Phnum     Elf32_Half // (2) 程序头表项数量
 71 | 	Shentsize Elf32_Half // (2) 节头表项大小
 72 | 	Shnum     Elf32_Half // (2) 节头表项数量
 73 | 	Shstrndx  Elf32_Half // (2) 节头字符串表索引
 74 | }
 75 | 
 76 | // Elf32_Shdr 段表项结构
 77 | type Elf32_Shdr struct {
 78 | 	Name      Elf32_Word // 段名（4字节，存在于字符串表中的偏移量， shstrtab 也是一个段， shstrndx ）
 79 | 	Type      Elf32_Word // 段类型 (1表示程序段.text.data 2表示符号段.symtab 3表示串表段.shstrtab 8表示内容段.bss 9表示重定位表段.rel.text.rel.data)
 80 | 	Flags     Elf32_Word // 段标志 (0表示默认 1表示可写 2表示段加载后需要为之分配空间 4表示可执行)
 81 | 	Addr      Elf32_Addr // 段虚拟地址 可重定位文件默认为零， 可执行文件由链接器计算地址
 82 | 	Offset    Elf32_Off  // 段在文件中的偏移
 83 | 	Size      Elf32_Word // 段的大小，字节单位， SHT_NOBITS 代表没有数据（此时指代加载后占用的内存大小）
 84 | 	Link      Elf32_Word // 段的链接信息，一般用于描述符号标段和重定位表段的链接信息。
 85 | 	Info      Elf32_Word // 附加信息
 86 | 	Addralign Elf32_Word // 对齐要求
 87 | 	Entsize   Elf32_Word // 表项大小
 88 | }
 89 | 
 90 | func NewShdr(Type SectionType, Flags SectionFlag, Offset, Size int) *Elf32_Shdr {
 91 | 	return &Elf32_Shdr{
 92 | 		Name:      0,
 93 | 		Type:      Elf32_Word(Type),
 94 | 		Flags:     Elf32_Word(Flags),
 95 | 		Addr:      0,
 96 | 		Offset:    Elf32_Off(Offset),
 97 | 		Size:      Elf32_Word(Size),
 98 | 		Link:      0,
 99 | 		Info:      0,
100 | 		Addralign: 4,
101 | 		Entsize:   0,
102 | 	}
103 | }
104 | 
105 | // Elf32_Sym ELF32符号表项结构
106 | type Elf32_Sym struct {
107 | 	Name  uint32 // 符号名
108 | 	Value uint32 // 符号值
109 | 	Size  uint32 // 符号大小
110 | 	Info  byte   // 符号类型和绑定信息
111 | 	Other byte   // 保留
112 | 	Shndx uint16 // 符号所在节
113 | }
114 | 
115 | // Elf32_Rel ELF32重定位表项结构
116 | type Elf32_Rel struct {
117 | 	Offset uint32
118 | 	Info   uint32
119 | }
120 | 
121 | type Elf32_RelInfo struct {
122 | 	SegName string     // 重定位的目标段名
123 | 	Rel     *Elf32_Rel // 重定位信息
124 | 	RelName string     // 符号名称
125 | }
126 | 
127 | // File elf文件类，包含elf文件的重要内容，处理elf文件
128 | type File struct {
129 | 	Ehdr         *Elf32_Ehdr            // ELF文件头
130 | 	PhdrTab      []*Elf32_Phdr          // 程序头表！
131 | 	ShdrTab      map[string]*Elf32_Shdr // 段表映射
132 | 	ShdrNames    []string               // 段名列表,  段表名和索引的映射关系，方便符号查询自己的段信息
133 | 	SymTab       map[string]*Elf32_Sym  // 符号表映射
134 | 	SymNames     []string               // 符号名列表, 符号名与符号表项索引的映射关系，对于重定位表生成重要
135 | 	RelTab       []*Elf32_RelInfo       // 重定位信息列表,// 省略 辅助数据 char *elf_dir;			   // 处理elf文件的目录
136 | 	Name         string                 // 文件名称
137 | 	Reader       BytesReader            // 缓存s
138 | 	Shstrtab     []byte                 // 段表字符串表数据
139 | 	ShstrtabSize int                    // 段表字符串表长
140 | 	Strtab       []byte                 // 字符串表数据
141 | 	StrtabSize   int                    // 字符串表长
142 | 	ProgSegList  []*ProgSeg             // 程序头表缓存数据
143 | }
144 | 
145 | func NewElfFile(magic Elf_Magic, eType, eMachine Elf32_Half) *File {
146 | 	file := &File{
147 | 		Ehdr: &Elf32_Ehdr{
148 | 			Magic:     magic,                  // 这个字段比较复杂
149 | 			Type:      eType,                  // 文件类型： 1表示可重定位, 2表示可执行 3表示共享目标 4 表示核心转储  0 表示无效
150 | 			Machine:   eMachine,               // 机器类型
151 | 			Version:   Elf32_Word(EV_CURRENT), // 文件版本 一般取1
152 | 			Entry:     0,                      // 程序入口的线性地址，一般用于可以执行文件， 可重定向文件该字段为 0
153 | 			Phoff:     0,                      // 程序头表在文件内的偏移地址， 标识了程序头表在文件内的位置
154 | 			Flags:     0,                      // 文件平台相关属性， 一般默认为 0 (x86 应该没用到)
155 | 			Ehsize:    52,                     // 文件头的大小 (跟系统位数有关 32位52字节 64位64字节)
156 | 			Phentsize: 0,                      // 程序头表项的大小
157 | 			Phnum:     0,                      // 程序头表项的个数，确定程序头表在文件[phoff: phoff + phentsize*phnum] 的数据块中
158 | 			Shentsize: 40,                     // 段表项的大小
159 | 			Shnum:     0,                      // 段表项的个数， 确定数据区块存在于 [shoff:shoff+shentsize*eshnum] 中
160 | 			Shstrndx:  0,                      // .shstrtab的索引
161 | 		},
162 | 		ShdrTab:     make(map[string]*Elf32_Shdr),
163 | 		ShdrNames:   make([]string, 0),
164 | 		SymTab:      make(map[string]*Elf32_Sym),
165 | 		SymNames:    make([]string, 0),
166 | 		RelTab:      make([]*Elf32_RelInfo, 0),
167 | 		Shstrtab:    make([]byte, 0),
168 | 		Strtab:      make([]byte, 0),
169 | 		ProgSegList: make([]*ProgSeg, 0),
170 | 	}
171 | 
172 | 	// 初始化ELF魔数
173 | 	//Ehdr.Magic[0] = 0x7F // DEL
174 | 	//Ehdr.Magic[1] = 'E'  // .
175 | 	//Ehdr.Magic[2] = 'L'  // .
176 | 	//Ehdr.Magic[3] = 'F'  // .
177 | 	//Ehdr.Magic[4] = 1    // Class 32位格式 64位(2) 0表示无效
178 | 	//Ehdr.Magic[5] = 1    // 小端序 大端(2) 0表示无效
179 | 	//Ehdr.Magic[6] = 1    // ELF版本 默认为1
180 | 	// 后面9字节在ELF标准中未定义， 一般用于平台相关的扩展标志
181 | 	// 第8字节 取0 表示 unix 系统
182 | 	// 第9字节 取0 表示系统 ABI 版本为 0
183 | 	// 其它字节默认为 0
184 | 
185 | 	// 添加空节表项(重定位文件和可执行文件都有)
186 | 	file.AddShdr("", &Elf32_Shdr{})
187 | 
188 | 	// 添加空符号表项
189 | 	file.AddSym("", nil)
190 | 
191 | 	return file
192 | }
193 | 
194 | func (e *File) Bits() int {
195 | 	return e.Ehdr.Magic.Bits()
196 | }
197 | 
198 | func (e *File) Endian() binary.ByteOrder { return e.Ehdr.Magic.Endian() }
199 | 
200 | func (e *File) AddShdr(shName string, shdr *Elf32_Shdr) {
201 | 	if shdr != nil {
202 | 		e.ShdrTab[shName] = shdr
203 | 	}
204 | 	e.ShdrNames = append(e.ShdrNames, shName)
205 | }
206 | 
207 | // AddShdrSec sh_name和sh_offset都需要重新计算 todo
208 | func (e *File) AddShdrSec(section *Section, offset int) {
209 | 	if section.Name == ".text" {
210 | 		e.AddShdr(section.Name,
211 | 			NewShdr(SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, offset, section.Length),
212 | 		)
213 | 	} else if section.Name == ".data" {
214 | 		e.AddShdr(section.Name,
215 | 			NewShdr(SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, offset, section.Length),
216 | 		)
217 | 	} else if section.Name == ".bss" { // 非必须
218 | 		// 关于 .bss 段： 用于存储未初始化的全局变量和静态变量
219 | 		// 特点：在程序价值时会被自动初始化为 0
220 | 		// 优势：节省可执行文件空间，只占用很少部分（通常只记录大小）
221 | 		// 场景：大小数组或缓冲区的申明， 未初始化的全局变量，未初始化的静态局部变量， 需要零初始化的数据结构
222 | 		// 语法: buffer: resw 1024 // 记录 Buffer 符号 需要 resw 宽度 * 1024 空间 (resw 等同于 dw)
223 | 		e.AddShdr(section.Name,
224 | 			NewShdr(SHT_NOBITS, SHF_ALLOC|SHF_WRITE, offset, section.Length),
225 | 		)
226 | 	}
227 | }
228 | 
229 | // AddPhdrRec 添加程序头表
230 | func (e *File) AddPhdr(t Elf32_Word, off Elf32_Off, vaddr Elf32_Addr, filesz, memsz, flags, align Elf32_Word) {
231 | 	ph := &Elf32_Phdr{
232 | 		Type:   t,
233 | 		Offset: off,
234 | 		VAddr:  vaddr,
235 | 		Filesz: filesz,
236 | 		Memsz:  memsz,
237 | 		Flags:  flags,
238 | 		Align:  align,
239 | 	}
240 | 	e.PhdrTab = append(e.PhdrTab, ph)
241 | }
242 | 
243 | // AddProgSeg 添加程序头表, 同时添加段表
244 | func (e *File) AddProgSeg(name string, seg *ProgSeg) {
245 | 	flags := PF_W | PF_R // 可读、可写
246 | 	filesz := seg.Size   // 占用磁盘大小（合并后的大小）
247 | 	if name == ".text" {
248 | 		flags = PF_X | PF_R //.text段可读可执行
249 | 	}
250 | 	if name == ".bss" {
251 | 		filesz = 0 // .bss段不占磁盘空间
252 | 	}
253 | 
254 | 	seg.Name = name
255 | 	e.ProgSegList = append(e.ProgSegList, seg)
256 | 	e.AddPhdr(Elf32_Word(PT_LOAD), seg.Offset, seg.BaseAddr,
257 | 		filesz, seg.Size, Elf32_Word(flags), MemAlign)
258 | 
259 | 	shType := SHT_PROGBITS
260 | 	shFlags := SHF_ALLOC | SHF_WRITE
261 | 	shAlign := 4 //4B
262 | 	if name == ".bss" {
263 | 		shType = SHT_NOBITS
264 | 	}
265 | 	if name == ".text" {
266 | 		shFlags = SHF_ALLOC | SHF_EXECINSTR
267 | 		shAlign = 16
268 | 	}
269 | 	// 添加程序头表也要添加对应的段
270 | 	//添加一个段表项，暂时按照4字节对齐
271 | 	shdr := NewShdr(shType, shFlags, int(seg.Offset), int(seg.Size))
272 | 	shdr.Addr = seg.BaseAddr
273 | 	shdr.Addralign = Elf32_Word(shAlign)
274 | 	e.AddShdr(name, shdr)
275 | }
276 | 
277 | func (e *File) AddSym(name string, sym *Elf32_Sym) {
278 | 	target := &Elf32_Sym{
279 | 		Name:  0,
280 | 		Value: 0,
281 | 		Size:  0,
282 | 		Info:  0,
283 | 		Other: 0,
284 | 		Shndx: 0,
285 | 	}
286 | 	if name != "" {
287 | 		target.Value = sym.Value
288 | 		target.Size = sym.Size
289 | 		target.Info = sym.Info
290 | 		target.Other = sym.Other
291 | 		target.Shndx = sym.Shndx
292 | 	}
293 | 	e.SymTab[name] = target
294 | 	e.SymNames = append(e.SymNames, name)
295 | }
296 | 
297 | func (e *File) AddRel(info *Elf32_RelInfo) {
298 | 	e.RelTab = append(e.RelTab, info)
299 | }
300 | 
301 | func (e *File) GetSegIndex(seg string) int {
302 | 	for i, name := range e.ShdrNames {
303 | 		if name == seg {
304 | 			return i
305 | 		}
306 | 	}
307 | 	return -1
308 | }
309 | 
310 | func (e *File) GetSymIndex(sym string) int {
311 | 	for i, name := range e.SymNames {
312 | 		if name == sym {
313 | 			return i
314 | 		}
315 | 	}
316 | 	return -1
317 | }
318 | 
319 | func (e *File) ReadData(offset Elf32_Off, size Elf32_Word) []byte {
320 | 	return e.Reader.Data(int(offset), int(size))
321 | }
322 | 
323 | func (e *File) ReadDataBy(seg string) []byte {
324 | 	section := e.ShdrTab[seg]
325 | 	return e.Reader.Data(int(section.Offset), int(section.Size))
326 | }
327 | 
328 | func (e *File) WriteFile(target string) error {
329 | 	return FileWrite(e, target)
330 | }
331 | 
332 | /*
333 | 	dir:输出目录
334 | 	flag:1-第一次写，文件头+PHT；2-第二次写，段表字符串表+段表+符号表+字符串表；
335 | */
336 | //void Elf_file::writeElf(const char*dir,int flag)
337 | //{
338 | //if(flag==1)
339 | //{
340 | //FILE*fp=fopen(dir,"w+");
341 | //fwrite(&ehdr,ehdr.e_ehsize,1,fp);//elf文件头
342 | //if(!phdrTab.empty())//程序头表
343 | //{
344 | //for(int i=0;i<phdrTab.size();++i)
345 | //fwrite(phdrTab[i],ehdr.e_phentsize,1,fp);
346 | //}
347 | //fclose(fp);
348 | //}
349 | //else if(flag==2)
350 | //{
351 | //FILE*fp=fopen(dir,"a+");
352 | //fwrite(shstrtab,shstrtabSize,1,fp);//.shstrtab
353 | //for(int i=0;i<shdrNames.size();++i)//段表
354 | //{
355 | //Elf32_Shdr*sh=shdrTab[shdrNames[i]];
356 | //fwrite(sh,ehdr.e_shentsize,1,fp);
357 | //}
358 | //for(int i=0;i<symNames.size();++i)//符号表
359 | //{
360 | //Elf32_Sym*sym=symTab[symNames[i]];
361 | //fwrite(sym,sizeof(Elf32_Sym),1,fp);
362 | //}
363 | //fwrite(strtab,strtabSize,1,fp);//.strtab
364 | //fclose(fp);
365 | //}
366 | //}
367 | 
368 | func (e *File) Objdump() {
369 | 	fmt.Printf("\nELF 文件头信息：\n")
370 | 	fmt.Printf("\t魔术信息：")
371 | 	for _, b := range e.Ehdr.Magic {
372 | 		fmt.Printf("%02x ", b) // %02x 保证两位数，不足补零
373 | 	}
374 | 	fmt.Printf("\n")
375 | 	fmt.Printf("\t文件类型：%d\n", e.Ehdr.Type)
376 | 	fmt.Printf("\t架构：%d\n", e.Ehdr.Machine)
377 | 	fmt.Printf("\t版本号：%d\n", e.Ehdr.Version)
378 | 	fmt.Printf("\t入口地址：0x%x(%d)\n", e.Ehdr.Entry, e.Ehdr.Entry)
379 | 	fmt.Printf("\t程序头表偏移地址：0x%x(%d)\n", e.Ehdr.Phoff, e.Ehdr.Phoff)
380 | 	fmt.Printf("\t段表偏移地址：0x%x(%d)\n", e.Ehdr.Shoff, e.Ehdr.Shoff)
381 | 	fmt.Printf("\tFlags 标志信息：%d\n", e.Ehdr.Flags)
382 | 	fmt.Printf("\t文件头大小：%d bytes\n", e.Ehdr.Ehsize)
383 | 	fmt.Printf("\t程序头表项大小：%d bytes\n", e.Ehdr.Phentsize)
384 | 	fmt.Printf("\t程序头表项数：%d\n", e.Ehdr.Phnum)
385 | 	fmt.Printf("\t段表项大小：%d bytes\n", e.Ehdr.Shentsize)
386 | 	fmt.Printf("\t段表项数：%d\n", e.Ehdr.Ehsize)
387 | 	fmt.Printf("\t节头字符串表索引：%d\n", e.Ehdr.Shstrndx)
388 | 
389 | 	//offset := int(e.Ehdr.Shoff)
390 | 	//shnum := int(e.Ehdr.Shnum)
391 | 	//shentsize := int(e.Ehdr.Shentsize)
392 | 	//fmt.Printf("\nELF 段表信息[开始：0x%x, 长度：0x%x]：\n", offset, shnum*shentsize)
393 | 	//w := tablewriter.NewWriter(os.Stdout)
394 | 	//w.SetAlignment(tablewriter.ALIGN_RIGHT)
395 | 	//w.SetHeader([]string{
396 | 	//	"开始地址", "序号", "名称", "类型", "标志", "地址", "位置偏移", "空间大小", "链接", "附加", "对齐", "表项大小",
397 | 	//})
398 | 	//for i, name := range e.ShdrNames {
399 | 	//	e := e.ShdrTab[name]
400 | 	//	w.Append([]string{
401 | 	//		fmt.Sprintf("0x%x", offset+i*shentsize),
402 | 	//		fmt.Sprintf("[%d]", i),
403 | 	//		name,
404 | 	//		SectionTypeName(e.Type),
405 | 	//		SectionFlagName(uint64(e.Flags)),
406 | 	//		fmt.Sprintf("0x%x", e.Addr),
407 | 	//		fmt.Sprintf("0x%x", e.Offset),
408 | 	//		fmt.Sprintf("%d bytes", e.Size),
409 | 	//		fmt.Sprintf("%d", e.Link),
410 | 	//		fmt.Sprintf("%d", e.Info),
411 | 	//		fmt.Sprintf("%d", e.Addralign),
412 | 	//		fmt.Sprintf("%d bytes", e.Entsize),
413 | 	//	})
414 | 	//}
415 | 	//w.Render()
416 | 	//
417 | 	//// 打印符号表【段】
418 | 	//symTabInfo := e.ShdrTab[".symtab"]
419 | 	//offset = int(symTabInfo.Offset)
420 | 	//fmt.Printf("\nELF 符号表信息[开始：0x%x, 长度：0x%x]：\n", offset, symTabInfo.Size)
421 | 	//w = tablewriter.NewWriter(os.Stdout)
422 | 	//w.SetHeader([]string{
423 | 	//	"开始地址", "序号", "名称", "地址", "空间大小", "类型和绑定", "其它", "所在节",
424 | 	//})
425 | 	//for i, name := range e.SymNames {
426 | 	//	sym := e.SymTab[name]
427 | 	//	segment := fmt.Sprintf("%d", sym.Shndx)
428 | 	//	if sym.Shndx > 0 {
429 | 	//		sh := e.ShdrNames[sym.Shndx]
430 | 	//		segment = fmt.Sprintf("%s,%s", segment, sh)
431 | 	//	}
432 | 	//	w.Append([]string{
433 | 	//		fmt.Sprintf("0x%x", offset+i*16),
434 | 	//		fmt.Sprintf("[%d]", i),
435 | 	//		name,
436 | 	//		fmt.Sprintf("0x%x", sym.Value),
437 | 	//		fmt.Sprintf("%d bytes", sym.Size),
438 | 	//		fmt.Sprintf("0b%b", sym.Info),
439 | 	//		fmt.Sprintf("0b%b", sym.Other),
440 | 	//		segment,
441 | 	//	})
442 | 	//}
443 | 	//w.Render()
444 | 
445 | 	// 打印重定位表【段】
446 | 
447 | 	// todo 循环遍历，依次打印段信息
448 | 
449 | }
450 | 
451 | //// GetData GetSectionData 获取节数据
452 | //func (f *ElfFile) GetData(seg *Elf32_Shdr) ([]byte, error) {
453 | //	offset := uint64(seg.Offset)
454 | //	size := uint64(seg.Size)
455 | //	os.Open() // 读取数据
456 | //
457 | //	data := make([]byte, size)
458 | //	if _, err := f.FileHandle.Seek(int64(offset), 0); err != nil {
459 | //		fmt.Printf("[DEBUG] 错误: 定位到节偏移失败: %v\n", err)
460 | //		return nil, err
461 | //	}
462 | //	if _, err := io.ReadFull(f.FileHandle, data); err != nil {
463 | //		fmt.Printf("[DEBUG] 错误: 读取节数据失败: %v\n", err)
464 | //		return nil, err
465 | //	}
466 | //	fmt.Printf("[DEBUG] 成功读取节数据, 大小: %d\n", len(data))
467 | //	return data, nil
468 | //}
469 | 


--------------------------------------------------------------------------------
/compiler/compile/parser/types.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"github.com/facelang/face/compiler/compile/ast"
  5 | 	"github.com/facelang/face/compiler/compile/tokens"
  6 | 	"github.com/facelang/face/internal/prog"
  7 | 	"go/token"
  8 | )
  9 | 
 10 | // NewIndirect 指针类型 todo, 暂时忽略
 11 | func NewIndirect(pos prog.FilePos, typ prog.Expr) prog.Expr {
 12 | 	o := new(prog.Operation)
 13 | 	o.pos = pos
 14 | 	o.Op = Mul
 15 | 	o.X = typ
 16 | 	return o
 17 | }
 18 | 
 19 | // FuncType If context != "", type parameters are not permitted.
 20 | func FuncType(p *parser, context string) ([]*prog.Field, *prog.FuncType) {
 21 | 
 22 | 	typ := new(prog.FuncType)
 23 | 	typ.pos = p.FilePos
 24 | 
 25 | 	var tparamList []*prog.Field
 26 | 	// 目标语法使用 尖括号
 27 | 	//if p.got(api.LBRACK) { // [] 泛型 func [] name(args)
 28 | 	//	if context != "" {
 29 | 	//		// accept but complain
 30 | 	//		p.syntaxErrorAt(typ.pos, context+" must have no type parameters")
 31 | 	//	}
 32 | 	//	if p.tok == _Rbrack {
 33 | 	//		p.syntaxError("empty type parameter list")
 34 | 	//		p.next()
 35 | 	//	} else {
 36 | 	//		tparamList = p.paramList(nil, nil, _Rbrack, true)
 37 | 	//	}
 38 | 	//}
 39 | 
 40 | 	p.want(LPAREN)
 41 | 	typ.ParamList = p.paramList(nil, nil, _Rparen, false)
 42 | 	typ.ResultList = p.funcResult()
 43 | 
 44 | 	return tparamList, typ
 45 | }
 46 | 
 47 | // TypeOrNil is like type_ but it returns nil if there was no type
 48 | // instead of reporting an error.
 49 | //
 50 | //	Type     = TypeName | TypeLit | "(" Type ")" .
 51 | //	TypeName = identifier | QualifiedIdent .
 52 | //	TypeLit  = ArrayType | StructType | PointerType | FunctionType | InterfaceType |
 53 | //		      SliceType | MapType | Channel_Type .
 54 | func TypeOrNil(p *parser) prog.Expr {
 55 | 	//defer decNestLev(incNestLev(p)) // 递归统计，避免递归太深
 56 | 	switch p.token {
 57 | 	case token.IDENT:
 58 | 		typ := p.parseTypeName(nil)
 59 | 		if p.tok == token.LBRACK {
 60 | 			typ = p.parseTypeInstance(typ)
 61 | 		}
 62 | 		return typ
 63 | 	case LBRACK:
 64 | 		lbrack := p.expect(LBRACK)
 65 | 		return p.parseArrayType(lbrack, nil)
 66 | 	case STRUCT:
 67 | 		return p.parseStructType()
 68 | 	case MUL:
 69 | 		return p.parsePointerType()
 70 | 	case FUNC:
 71 | 		return p.parseFuncType()
 72 | 	case INTERFACE:
 73 | 		return p.parseInterfaceType()
 74 | 	case MAP:
 75 | 		return p.parseMapType()
 76 | 	case CHAN, ARROW:
 77 | 		return p.parseChanType()
 78 | 	case LPAREN:
 79 | 		lparen := p.pos
 80 | 		p.next()
 81 | 		typ := p.parseType()
 82 | 		rparen := p.expect(RPAREN)
 83 | 		return &ast.ParenExpr{Lparen: lparen, X: typ, Rparen: rparen}
 84 | 	}
 85 | 
 86 | 	// no type found
 87 | 	return nil
 88 | }
 89 | 
 90 | func RequireType(p *parser) prog.Expr {
 91 | 	typ := TypeOrNil(p)
 92 | 	if typ == nil {
 93 | 		p.unexpect("type")
 94 | 	}
 95 | 	return typ
 96 | }
 97 | 
 98 | /**
 99 | 函数类型: let v1 func(string)
100 | 函数类型: let v2 (string) => string
101 | 数组类型: let v3 array<int>
102 | 字典类型: let v4 map<string, string>
103 | 基本数据类型: let v5 int [string, float]
104 | 其它自定义类型: let v6 http.Http [或其它类型别名]
105 | */
106 | 
107 | func (p *parser) parseTypeInstance(typ ast.Expr) ast.Expr {
108 | 	opening := p.expect(token.LBRACK) // [
109 | 	//p.exprLev++
110 | 	var list []ast.Expr
111 | 	for p.token != token.RBRACK && p.token != token.EOF {
112 | 		list = append(list, p.parseType())
113 | 		if p.token != token.COMMA {
114 | 			break
115 | 		}
116 | 		p.next()
117 | 	}
118 | 	//p.exprLev--
119 | 
120 | 	closing := p.expect(token.RBRACK) // ]
121 | 
122 | 	if len(list) == 0 {
123 | 		p.unexpect("type argument list")
124 | 		return &ast.IndexExpr{
125 | 			X:      typ,
126 | 			Lbrack: opening,
127 | 			Index:  &ast.BadExpr{From: opening + 1, To: closing},
128 | 			Rbrack: closing,
129 | 		}
130 | 	}
131 | 
132 | 	return packIndexExpr(typ, opening, list, closing)
133 | }
134 | 
135 | // If the result is an identifier, it is not resolved.
136 | func (p *parser) parseTypeName(ident *ast.Name) ast.Expr {
137 | 	if ident == nil {
138 | 		ident = p.name()
139 | 	}
140 | 
141 | 	if p.token == token.PERIOD {
142 | 		p.next()
143 | 		sel := p.name()
144 | 		return &ast.SelectorExpr{X: ident, Sel: sel}
145 | 	}
146 | 
147 | 	return ident
148 | }
149 | 
150 | // "[" has already been consumed, and lbrack is its position.
151 | // If len != nil it is the already consumed array length.
152 | func (p *parser) parseArrayType(lbrack token.Pos, len ast.Expr) *ast.ArrayType {
153 | 
154 | 	if len == nil { // 没有解析 [x] 中间的参数
155 | 		//p.exprLev++
156 | 		// always permit ellipsis for more fault-tolerant parsing
157 | 		if p.token == token.ELLIPSIS { // [...]
158 | 			len = &ast.Ellipsis{Ellipsis: p.pos}
159 | 			p.next()
160 | 		} else if p.token != token.RBRACK { // [len]
161 | 			len = exprRhs(p)
162 | 		}
163 | 		// len 可能为 nil
164 | 		//p.exprLev--
165 | 	}
166 | 	if p.token == token.COMMA { // , 不应该出现
167 | 		// Trailing commas are accepted in type parameter
168 | 		// lists but not in array type declarations.
169 | 		// Accept for better error handling but complain.
170 | 		p.error(p.pos, "unexpected comma; expecting ]")
171 | 		p.next()
172 | 	}
173 | 	p.expect(token.RBRACK) // ] 结束符
174 | 	elt := p.parseType()   // 可能是多维数组
175 | 	return &ast.ArrayType{Lbrack: lbrack, Len: len, Elt: elt}
176 | }
177 | 
178 | func (p *parser) parseMapType() *ast.MapType {
179 | 	pos := p.expect(token.MAP) // map
180 | 	p.expect(token.LBRACK)     // [
181 | 	key := p.parseType()       // keyType
182 | 	p.expect(token.RBRACK)     // ]
183 | 	value := p.parseType()     // valType
184 | 
185 | 	return &ast.MapType{Map: pos, Key: key, Value: value}
186 | }
187 | 
188 | func (p *parser) parseQualifiedIdent(ident *ast.Name) ast.Expr {
189 | 
190 | 	typ := p.parseTypeName(ident)
191 | 	if p.token == token.LBRACK {
192 | 		typ = p.parseTypeInstance(typ)
193 | 	}
194 | 
195 | 	return typ
196 | }
197 | 
198 | func (p *parser) parseArrayFieldOrTypeInstance(x *ast.Name) (*ast.Name, ast.Expr) {
199 | 	lbrack := p.expect(token.LBRACK)
200 | 	trailingComma := token.NoPos // if valid, the position of a trailing comma preceding the ']'
201 | 	var args []ast.Expr
202 | 	if p.token != token.RBRACK {
203 | 		//p.exprLev++
204 | 		args = append(args, exprRhs(p))
205 | 		for p.token == token.COMMA {
206 | 			comma := p.pos
207 | 			p.next()
208 | 			if p.token == token.RBRACK {
209 | 				trailingComma = comma
210 | 				break
211 | 			}
212 | 			args = append(args, exprRhs(p))
213 | 		}
214 | 		//p.exprLev--
215 | 	}
216 | 	rbrack := p.expect(token.RBRACK)
217 | 
218 | 	if len(args) == 0 {
219 | 		// x []E
220 | 		elt := p.parseType()
221 | 		return x, &ast.ArrayType{Lbrack: lbrack, Elt: elt}
222 | 	}
223 | 
224 | 	// x [P]E or x[P]
225 | 	if len(args) == 1 {
226 | 		elt := p.tryIdentOrType()
227 | 		if elt != nil {
228 | 			// x [P]E
229 | 			if trailingComma.IsValid() {
230 | 				// Trailing commas are invalid in array type fields.
231 | 				p.error(trailingComma, "unexpected comma; expecting ]")
232 | 			}
233 | 			return x, &ast.ArrayType{Lbrack: lbrack, Len: args[0], Elt: elt}
234 | 		}
235 | 	}
236 | 
237 | 	// x[P], x[P1, P2], ...
238 | 	return nil, packIndexExpr(x, lbrack, args, rbrack)
239 | }
240 | 
241 | // 只在结构体中
242 | func (p *parser) parseFieldDecl() *ast.Field {
243 | 
244 | 	//doc := p.leadComment
245 | 
246 | 	var names []*ast.Name
247 | 	var typ ast.Expr
248 | 	switch p.token {
249 | 	case token.IDENT: // 先解析字段名
250 | 		name := p.name()
251 | 		if p.token == token.PERIOD || p.token == token.STRING || p.token == token.SEMICOLON || p.token == token.RBRACE {
252 | 			// embedded type
253 | 			// 继续解析 name.   . "" ; }
254 | 			typ = name
255 | 			if p.token == token.PERIOD {
256 | 				typ = p.parseQualifiedIdent(name)
257 | 			}
258 | 		} else { // 其它符号
259 | 			// name1, name2, ... T
260 | 			names = []*ast.Name{name}
261 | 			for p.token == token.COMMA { // struct { a, b, c int }
262 | 				p.next()
263 | 				names = append(names, p.name())
264 | 			}
265 | 			// Careful dance: We don't know if we have an embedded instantiated
266 | 			// type T[P1, P2, ...] or a field T of array type []E or [P]E.
267 | 			// { a }
268 | 			if len(names) == 1 && p.token == token.LBRACK {
269 | 				name, typ = p.parseArrayFieldOrTypeInstance(name) // todo
270 | 				if name == nil {
271 | 					names = nil
272 | 				}
273 | 			} else {
274 | 				// T P
275 | 				typ = p.parseType()
276 | 			}
277 | 		}
278 | 	case token.MUL:
279 | 		star := p.pos
280 | 		p.next()
281 | 		if p.token == token.LPAREN {
282 | 			// *(T)
283 | 			p.error(p.pos, "cannot parenthesize embedded type")
284 | 			p.next()
285 | 			typ = p.parseQualifiedIdent(nil)
286 | 			// expect closing ')' but no need to complain if missing
287 | 			if p.token == token.RPAREN {
288 | 				p.next()
289 | 			}
290 | 		} else {
291 | 			// *T
292 | 			typ = p.parseQualifiedIdent(nil)
293 | 		}
294 | 		typ = &ast.StarExpr{Star: star, X: typ}
295 | 
296 | 	case token.LPAREN:
297 | 		p.error(p.pos, "cannot parenthesize embedded type")
298 | 		p.next()
299 | 		if p.token == token.MUL {
300 | 			// (*T)
301 | 			star := p.pos
302 | 			p.next()
303 | 			typ = &ast.StarExpr{Star: star, X: p.parseQualifiedIdent(nil)}
304 | 		} else {
305 | 			// (T)
306 | 			typ = p.parseQualifiedIdent(nil)
307 | 		}
308 | 		// expect closing ')' but no need to complain if missing
309 | 		if p.token == token.RPAREN {
310 | 			p.next()
311 | 		}
312 | 
313 | 	default:
314 | 		pos := p.pos
315 | 		p.unexpect("field name or embedded type")
316 | 		typ = &ast.BadExpr{From: pos, To: p.pos}
317 | 	}
318 | 
319 | 	var tag *ast.BasicLit
320 | 	if p.token == token.STRING {
321 | 		tag = &ast.BasicLit{Pos: p.pos, Kind: p.token, Value: p.identifier}
322 | 		p.next()
323 | 	}
324 | 
325 | 	field := &ast.Field{Names: names, Type: typ, Tag: tag}
326 | 	return field
327 | }
328 | 
329 | func (p *parser) parseStructType() *ast.StructType {
330 | 	pos := p.expect(token.STRUCT) // struct {}
331 | 	lbrace := p.expect(token.LBRACE)
332 | 	var list []*ast.Field
333 | 	for p.token == token.IDENT || p.token == token.MUL || p.token == token.LPAREN {
334 | 		// a field declaration cannot start with a '(' but we accept
335 | 		// it here for more robust parsing and better error messages
336 | 		// (parseFieldDecl will check and complain if necessary)
337 | 		list = append(list, p.parseFieldDecl())
338 | 	}
339 | 	rbrace := p.expect(token.RBRACE)
340 | 
341 | 	return &ast.StructType{
342 | 		Struct: pos,
343 | 		Fields: &ast.FieldList{
344 | 			Opening: lbrace,
345 | 			List:    list,
346 | 			Closing: rbrace,
347 | 		},
348 | 	}
349 | }
350 | 
351 | func (p *parser) parsePointerType() *ast.StarExpr {
352 | 	star := p.expect(token.MUL)
353 | 	base := p.parseType()
354 | 
355 | 	return &ast.StarExpr{Star: star, X: base}
356 | }
357 | 
358 | func (p *parser) parseMethodSpec() *ast.Field {
359 | 	var idents []*ast.Name
360 | 	var typ ast.Expr
361 | 	x := p.parseTypeName(nil)
362 | 	if ident, _ := x.(*ast.Name); ident != nil {
363 | 		switch {
364 | 		case p.token == token.LBRACK:
365 | 			// generic method or embedded instantiated type
366 | 			lbrack := p.pos
367 | 			p.next()
368 | 			//p.exprLev++
369 | 			x := expr(p)
370 | 			//p.exprLev--
371 | 			if name0, _ := x.(*ast.Name); name0 != nil && p.token != token.COMMA && p.token != token.RBRACK {
372 | 				// generic method m[T any]
373 | 				//
374 | 				// Interface methods do not have type parameters. We parse them for a
375 | 				// better error message and improved error recovery.
376 | 				_ = p.parseParameterList(name0, nil, token.RBRACK)
377 | 				_ = p.expect(token.RBRACK)
378 | 				p.error(lbrack, "interface method must have no type parameters")
379 | 
380 | 				// TODO(rfindley) refactor to share code with parseFuncType.
381 | 				_, params := p.parseParameters(false)
382 | 				results := p.parseResult()
383 | 				idents = []*ast.Name{ident}
384 | 				typ = &ast.FuncType{
385 | 					Func:    token.NoPos,
386 | 					Params:  params,
387 | 					Results: results,
388 | 				}
389 | 			} else {
390 | 				// embedded instantiated type
391 | 				// TODO(rfindley) should resolve all identifiers in x.
392 | 				list := []ast.Expr{x}
393 | 				if p.token == token.COMMA {
394 | 					//p.exprLev++
395 | 					p.next()
396 | 					for p.token != token.RBRACK && p.token != token.EOF {
397 | 						list = append(list, p.parseType())
398 | 						if p.token != token.COMMA {
399 | 							break
400 | 						}
401 | 						p.next()
402 | 					}
403 | 					//p.exprLev--
404 | 				}
405 | 				rbrack := p.expectClosing(token.RBRACK, "type argument list")
406 | 				typ = packIndexExpr(ident, lbrack, list, rbrack)
407 | 			}
408 | 		case p.token == token.LPAREN:
409 | 			// ordinary method
410 | 			// TODO(rfindley) refactor to share code with parseFuncType.
411 | 			_, params := p.parseParameters(false)
412 | 			results := p.parseResult()
413 | 			idents = []*ast.Ident{ident}
414 | 			typ = &ast.FuncType{Func: token.NoPos, Params: params, Results: results}
415 | 		default:
416 | 			// embedded type
417 | 			typ = x
418 | 		}
419 | 	} else {
420 | 		// embedded, possibly instantiated type
421 | 		typ = x
422 | 		if p.token == token.LBRACK {
423 | 			// embedded instantiated interface
424 | 			typ = p.parseTypeInstance(typ)
425 | 		}
426 | 	}
427 | 
428 | 	return &ast.Field{Names: idents, Type: typ}
429 | }
430 | 
431 | func (p *parser) embeddedElem(x ast.Expr) ast.Expr {
432 | 	if x == nil {
433 | 		x = p.embeddedTerm()
434 | 	}
435 | 	for p.token == token.OR {
436 | 		t := new(ast.BinaryExpr)
437 | 		t.OpPos = p.pos
438 | 		t.Op = token.OR
439 | 		p.next()
440 | 		t.X = x
441 | 		t.Y = p.embeddedTerm()
442 | 		x = t
443 | 	}
444 | 	return x
445 | }
446 | 
447 | func (p *parser) embeddedTerm() ast.Expr {
448 | 	if p.token == token.TILDE {
449 | 		t := new(ast.UnaryExpr)
450 | 		t.OpPos = p.pos
451 | 		t.Op = token.TILDE
452 | 		p.next()
453 | 		t.X = p.parseType()
454 | 		return t
455 | 	}
456 | 
457 | 	t := p.tryIdentOrType()
458 | 	if t == nil {
459 | 		pos := p.pos
460 | 		p.unexpect("~ term or type")
461 | 		return &ast.BadExpr{From: pos, To: p.pos}
462 | 	}
463 | 
464 | 	return t
465 | }
466 | 
467 | func (p *parser) parseInterfaceType() *ast.InterfaceType {
468 | 	pos := p.expect(token.INTERFACE) // interface {}
469 | 	lbrace := p.expect(token.LBRACE)
470 | 
471 | 	var list []*ast.Field
472 | 
473 | parseElements:
474 | 	for {
475 | 		switch {
476 | 		case p.token == token.IDENT: // 只能声明函数
477 | 			f := p.parseMethodSpec()
478 | 			if f.Names == nil {
479 | 				f.Type = p.embeddedElem(f.Type)
480 | 			}
481 | 			f.Comment = p.expectSemi()
482 | 			list = append(list, f)
483 | 		case p.token == token.TILDE:
484 | 			typ := p.embeddedElem(nil)
485 | 			comment := p.expectSemi()
486 | 			list = append(list, &ast.Field{Type: typ, Comment: comment})
487 | 		default:
488 | 			if t := p.tryIdentOrType(); t != nil {
489 | 				typ := p.embeddedElem(t)
490 | 				comment := p.expectSemi()
491 | 				list = append(list, &ast.Field{Type: typ, Comment: comment})
492 | 			} else {
493 | 				break parseElements
494 | 			}
495 | 		}
496 | 	}
497 | 
498 | 	// TODO(rfindley): the error produced here could be improved, since we could
499 | 	// accept an identifier, 'type', or a '}' at this point.
500 | 	rbrace := p.expect(token.RBRACE)
501 | 
502 | 	return &ast.InterfaceType{
503 | 		Interface: pos,
504 | 		Methods: &ast.FieldList{
505 | 			Opening: lbrace,
506 | 			List:    list,
507 | 			Closing: rbrace,
508 | 		},
509 | 	}
510 | }
511 | 
512 | func (p *parser) tryIdentOrType() ast.Expr {
513 | 	defer decNestLev(incNestLev(p))
514 | 
515 | 	switch p.token {
516 | 	case token.IDENT:
517 | 		typ := p.parseTypeName(nil)  // 可能是 x.name(包名) 或者 x
518 | 		if p.token == token.LBRACK { // x[]
519 | 			typ = p.parseTypeInstance(typ) // todo
520 | 		}
521 | 		return typ
522 | 	case token.LBRACK:
523 | 		lbrack := p.expect(token.LBRACK) // n[]
524 | 		return p.parseArrayType(lbrack, nil)
525 | 	case token.STRUCT:
526 | 		return p.parseStructType()
527 | 	case token.MUL:
528 | 		return p.parsePointerType()
529 | 	case token.FUNC:
530 | 		return p.parseFuncType()
531 | 	case token.INTERFACE:
532 | 		return p.parseInterfaceType()
533 | 	case token.MAP:
534 | 		return p.parseMapType()
535 | 	//case tokens.CHAN, tokens.ARROW:
536 | 	//	return p.parseChanType()
537 | 	case token.LPAREN: // (
538 | 		lparen := p.pos
539 | 		p.next()
540 | 		typ := p.parseType()
541 | 		rparen := p.expect(token.RPAREN)
542 | 		return &ast.ParenExpr{Lparen: lparen, X: typ, Rparen: rparen}
543 | 	}
544 | 
545 | 	// no type found
546 | 	return nil
547 | }
548 | 
549 | func (p *parser) parseType() ast.Expr {
550 | 	typ := p.tryIdentOrType()
551 | 
552 | 	if typ == nil {
553 | 		pos := p.pos
554 | 		p.unexpect("type")
555 | 		return &ast.BadExpr{From: pos, To: p.pos}
556 | 	}
557 | 
558 | 	return typ
559 | }
560 | 


--------------------------------------------------------------------------------
/compiler/assemble/internal/parser.go:
--------------------------------------------------------------------------------
  1 | package internal
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"github.com/facelang/face/compiler/compile/token"
  7 | 	"github.com/facelang/face/internal/os/elf"
  8 | 	"github.com/facelang/face/internal/utils"
  9 | 	"go/ast"
 10 | 	"io"
 11 | 	"os"
 12 | 	"sort"
 13 | 	"strconv"
 14 | 	"strings"
 15 | 	"text/scanner"
 16 | )
 17 | 
 18 | // 重定位类型常量
 19 | const (
 20 | 	R_386_32   = 1 // 绝对寻址
 21 | 	R_386_PC32 = 2 // 相对寻址
 22 | )
 23 | 
 24 | type section struct {
 25 | 	Name           string
 26 | 	Offset, Length int
 27 | }
 28 | 
 29 | type relocate struct {
 30 | 	Label   string // 重定位符号的名称
 31 | 	Type    int    // 重定位类型0-R_386_32；1-R_386_PC32
 32 | 	Offset  int    // 重定位位置的偏移
 33 | 	Section string // 重定位目标段
 34 | }
 35 | 
 36 | type parser struct {
 37 | 	*lexer                      // 词法解析器
 38 | 	token        Token          // 符号类型
 39 | 	error        error          // 错误信息
 40 | 	declList     []*ast.GenDecl // 语句列表
 41 | 	sec          *section       // 当前段
 42 | 	secList      []*section     // 所有段列表
 43 | 	instrList    []*instr       // 指令列表
 44 | 	labelList    []*label       // 符号表
 45 | 	labelNames   map[string]int // 符号表，名称映射
 46 | 	relocateList []*relocate    // 重定位表
 47 | 
 48 | 	//lineNum       int   // Line number in source file.
 49 | 	//errorLine     int   // Line number of last error.
 50 | 	//errorCount    int   // Number of errors.
 51 | 	//sawCode       bool  // saw code in this file (as opposed to comments and blank lines)
 52 | 	//pc            int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA.
 53 | 	//input         []lex.Token
 54 | 	//inputPos      int
 55 | 	//pendingLabels []string // Labels to attach to next instruction.
 56 | 	//labels        map[string]*obj.Prog
 57 | 	//toPatch       []Patch
 58 | 	//addr          []obj.Addr
 59 | 	//ctxt          *obj.Link
 60 | 	//firstProg     *obj.Prog
 61 | 	//lastProg      *obj.Prog
 62 | 	//dataAddr      map[string]int64 // Most recent address for DATA for this symbol.
 63 | 	//isJump        bool             // Instruction being assembled is a jump.
 64 | 	//allowABI      bool             // Whether ABI selectors are allowed.
 65 | 	//pkgPrefix     string           // Prefix to add to local symbols.
 66 | 	//errorWriter   io.Writer
 67 | }
 68 | 
 69 | //func (p *Parser) prefix() (string, bool) {
 70 | //	var token tokens.Token
 71 | //	for {
 72 | //		token = p.lex.NextToken()
 73 | //		if token == tokens.EOF {
 74 | //			return "", false
 75 | //		}
 76 | //		if token != tokens.COMMENT {
 77 | //			break
 78 | //		}
 79 | //	}
 80 | //
 81 | //	if token == tokens.IDENT {
 82 | //		panic(fmt.Errorf("unexpected token %s", "IDENT"))
 83 | //	}
 84 | //
 85 | //	return p.lex.ident, true
 86 | //}
 87 | 
 88 | func (p *parser) _addRel(label string, relType int) {
 89 | 	p.relocateList = append(
 90 | 		p.relocateList,
 91 | 		&relocate{
 92 | 			Label:   label,        // 重定位符号的名称
 93 | 			Type:    relType,      // 重定位类型0-R_386_32；1-R_386_PC32
 94 | 			Offset:  p.sec.Offset, // 重定位位置的偏移
 95 | 			Section: p.sec.Name,   // 重定位目标段
 96 | 		},
 97 | 	)
 98 | }
 99 | 
100 | // 段落切换
101 | func (p *parser) _switch(id string) {
102 | 	p.secList = append(
103 | 		p.secList,
104 | 		&section{
105 | 			Name:   p.sec.Name,
106 | 			Length: p.sec.Offset, // 结束位置，也代表大小, 先不记录偏移
107 | 		},
108 | 	)
109 | 
110 | 	p.sec.Name = id  // 切换到下一个段
111 | 	p.sec.Offset = 0 // 清0段偏移
112 | }
113 | 
114 | // ----------------------------------------------------------------------------------
115 | // -- parser start
116 | 
117 | func (p *parser) errorf(format string, args ...interface{}) {
118 | 	p.error = fmt.Errorf(format, args...)
119 | 	panic(p.error)
120 | }
121 | 
122 | func (p *parser) next() {
123 | 	p.token = p.lexer.NextToken()
124 | 	for p.token == COMMENT {
125 | 		p.token = p.lexer.NextToken()
126 | 	}
127 | }
128 | 
129 | func (p *parser) got(token Token) bool {
130 | 	if p.token == token {
131 | 		p.next()
132 | 		return true
133 | 	}
134 | 	return false
135 | }
136 | 
137 | func (p *parser) expect(tokens ...Token) Pos {
138 | 	pos := p.pos
139 | 	for _, tok := range tokens {
140 | 		if p.token == tok {
141 | 			p.next()
142 | 			return pos
143 | 		}
144 | 	}
145 | 
146 | 	p.unexpect(tokens[0].String())
147 | 	return pos
148 | }
149 | 
150 | func (p *parser) unexpect(except string) {
151 | 	found := token.TokenLabel(p.token, p.id)
152 | 	p.errorf("except %s, found %s", except, found)
153 | }
154 | 
155 | // defineType 处理数据定义, 同时计算符号长度
156 | func (p *parser) data(cont *[]int64, contLen *int64) {
157 | 	switch p.token {
158 | 	case IDENT: // 引用变量，变量必须已经被申明， 如果符号未定义，则记录重定位
159 | 		lb := p.GetLabel(p.id)
160 | 		if lb.Type == EQU_LABEL || lb.Type == LOCAL_LABEL {
161 | 			(*cont)[*contLen] = lb.Addr
162 | 		} else { // 未定义或非法符号, equ 做了单独处理！
163 | 			p._addRel(p.id, R_386_32)
164 | 		}
165 | 		*contLen++
166 | 		p.next()
167 | 	case INT:
168 | 		(*cont)[*contLen] = utils.IntBytes(p.id)
169 | 		*contLen++
170 | 		p.next()
171 | 	case FLOAT:
172 | 		(*cont)[*contLen] = utils.FloatBytes(p.id)
173 | 		*contLen++
174 | 		p.next()
175 | 	case STRING:
176 | 		for _, ch := range []byte(p.id) {
177 | 			(*cont)[*contLen] = int64(ch)
178 | 			*contLen++
179 | 		}
180 | 		p.next()
181 | 	default:
182 | 		// todo
183 | 		//p.errorf("[valType](%d,%d): %s, %，数据类型获取异常！", p.token.Message(p.id))
184 | 	}
185 | }
186 | 
187 | func (p *parser) define(id string, times, size int) {
188 | 	lb := NewLabel(LOCAL_LABEL)
189 | 	lb.Times = times
190 | 	lb.Size = size
191 | 	lb.Cont = make([]int, 255) // 数据缓存
192 | 	lb.ContLen = 0
193 | 	p.data(&lb.Cont, &lb.ContLen) // 这里获得的是值， 数字、字符串、引用名
194 | 
195 | 	// 看是否有连续定义, 例如："hello world", 13, 10
196 | 	token := p.NextToken()
197 | 	for token == COMMA {
198 | 		p.data(&lb.Cont, &lb.ContLen) // 这里获得的是值， 数字、字符串、引用名
199 | 		token = p.NextToken()
200 | 	}
201 | 
202 | 	p.ProcTable.AddLabel(id, lb)
203 | }
204 | 
205 | // 以符号名称开始的语句， 数据定义，或代码段标记
206 | func (p *parser) labelDec(id string) {
207 | 	p.next()
208 | 	switch p.token {
209 | 	case A_TIMES: // 需要重复
210 | 		p.expect(INT)
211 | 		repeat := utils.Int(p.id)
212 | 		size := p.size()
213 | 		p.define(id, utils.Int(p.id), p.size()) // size 代表数据类型 db dd 字符、字、双字
214 | 	case A_EQU: // equ 常量？伪指令，所有使用到该符号的，全部替换为值，不存在地址。
215 | 		// 这里需要被替换为值
216 | 		// 关于 equ 语法说明，equ 支持表达式：可以是数字、地址、其他符号、算术表达式等
217 | 		// equ 定义的符号在汇编时就被替换为具体值，不会占用内存，也不会生成机器码。
218 | 		// 不能对 equ 定义的符号赋新值（它不是变量）。
219 | 		// equ 只能用于常量表达式，不能用于运行时可变的值。
220 | 		// todo 完整的逻辑需要支持 数字，其他符号，表达式， 【最终获得运算后的值】
221 | 		// todo equ 引用其它符号，必须提前申明！
222 | 		p.require(NUMBER)                                 // todo 当前只支持数字
223 | 		p.ProcTable.AddLabel(id, NewLabelEqu(p.number())) // 直接添加符号
224 | 	case COLON: // 代码段（label）, main: 一般是函数名作为一个单独的记号
225 | 		p.ProcTable.AddLabel(id, NewLabelText()) // 作为一个段符号
226 | 	default: // 变量支持
227 | 		p.Lexer.Back(token) // db, dd, dw // 退回去重新读 p.size()
228 | 
229 | 		p.values(id, 1, p.size()) // 单个变量定义，直接解析
230 | 	}
231 | }
232 | 
233 | func (p *parser) dataList() (*ast.File, error) {
234 | 	p.next() // 跳过.data
235 | 	// 解析数据段内容
236 | 	for p.token > _literal {
237 | 		switch p.token {
238 | 		case ".byte", ".word", ".long", ".quad", ".float", ".double", A_STRING:
239 | 			// 解析数据定义伪指令
240 | 			decl := p.parseDataDirective()
241 | 			if decl != nil {
242 | 				p.declList = append(p.declList, decl)
243 | 			}
244 | 		case A_REPT:
245 | 			// 解析重复定义
246 | 			decl := p.parseReptDirective()
247 | 			if decl != nil {
248 | 				p.declList = append(p.declList, decl)
249 | 			}
250 | 		case IDENT:
251 | 			// 解析标签定义
252 | 			p.declList = append(p.declList, p.labelDec(p.id))
253 | 		case A_GLB: // 全局符号定义
254 | 			p.require(IDENT)
255 | 			// 添加到全局符号表
256 | 			p.ProcTable.AddLabel(p.id, NewLabelGlobal())
257 | 		default:
258 | 			p.errorf("unexpected token in data section: %s", p.token)
259 | 			return nil, p.error
260 | 		}
261 | 		p.next()
262 | 	}
263 | }
264 | 
265 | func (p *parser) ParseFile() (*ast.File, error) {
266 | 	if p.error != nil {
267 | 		return nil, p.error
268 | 	}
269 | 
270 | 	p.next()
271 | 
272 | 	for p.token > _literal {
273 | 		switch p.token {
274 | 		case A_DATA: // 数据段定义
275 | 			p.dataList()
276 | 		case A_TEXT: // 代码段定义
277 | 			p.next() // 跳过.text
278 | 			// 解析代码段内容
279 | 			for p.token > _literal {
280 | 				switch p.token {
281 | 				case IDENT:
282 | 					p.declList = append(p.declList, p.labelDec(p.id))
283 | 				default:
284 | 					p.inst(p.token) // 解析指令
285 | 				}
286 | 				p.next()
287 | 			}
288 | 		case IDENT: // 两种情况，段落定义，变量定义
289 | 			p.declList = append(p.declList, p.labelDec(p.id))
290 | 		case A_SEC: // 段定义
291 | 			p.require(IDENT)
292 | 			p._switch(p.id) // 切换到新的段
293 | 		case A_GLB: // 全局符号定义
294 | 			p.require(IDENT)
295 | 			// 添加到全局符号表
296 | 			p.ProcTable.AddLabel(p.id, NewLabelGlobal())
297 | 		default:
298 | 			p.inst(p.token) // 解析指令
299 | 		}
300 | 
301 | 		p.next()
302 | 	}
303 | 
304 | 	p._switch("") // 结束最后一个段
305 | 
306 | 	return &ast.File{
307 | 		Decls: p.declList,
308 | 	}, nil
309 | }
310 | 
311 | // ExportLb 导出符号表
312 | //func (proc *parser) ExportLb() {
313 | //	for _, lb := range proc.MapLabel {
314 | //		if !lb.IsEqu { // EQU定义的符号不导出
315 | //			ObjFile.addSym(lb)
316 | //		}
317 | //	}
318 | //}
319 | 
320 | //func (proc *parser) WriteData(file *os.File) {
321 | //	for _, lb := range proc.DefLabelList {
322 | //		lb.write(file)
323 | //	}
324 | //}
325 | 
326 | //// Codegen 代码生成, 生成代码，同时记录每个段的大小
327 | //func (proc *parser) Codegen() error {
328 | //	// 源码扫描完成，开始生成代码， 内部符号已存在
329 | //	instrBuffer := bytes.NewBuffer(nil)
330 | //	for _, instr := range proc.InstrList {
331 | //		instr.WriteOut(instrBuffer, &proc.seg.Offset)
332 | //	}
333 | //	instrBuffer.Len() // 代码段大小
334 | //
335 | //	// important 符号表[可能]存在符号嵌套引用
336 | //	//    但是所有嵌套引用，被引用的符号必须被声明
337 | //	//        如果引用外部符号，记录地址， 下一个引用符合也只引用所在地址信息
338 | //	//    逻辑上无需处理嵌套
339 | //	for _, label := range proc.labelList {
340 | //
341 | //	}
342 | //	//for _, instr := range proc.InstrList {
343 | //	//	instr.WriteOut(instrBuffer, &proc.seg.Offset)
344 | //	//}
345 | //
346 | //}
347 | 
348 | //func Check(src, dest []byte, name string) {
349 | //	for i, ch := range src {
350 | //		if len(dest) <= i {
351 | //			fmt.Printf("错误：[0x%X],  两文件内容长度不一致: [%d, %d]", i, len(src), len(dest))
352 | //			return
353 | //		}
354 | //		if ch != dest[i] {
355 | //			fmt.Printf("错误:[0x%X, %d]（%X(%d) != %X(%d)）", i, i, ch, ch, dest[i], dest[i])
356 | //
357 | //			for i2, b := range src[i-4 : i+16] {
358 | //				fmt.Printf("%d: [%d, %d] \n", i+i2-4, b, dest[i+i2-4])
359 | //			}
360 | //			fmt.Printf("\n")
361 | //			return
362 | //		}
363 | //	}
364 | //
365 | //	fmt.Printf("校验完成，[%s]完全一致！\n", name)
366 | //}
367 | 
368 | /*
369 | *
370 | .section .name
371 | .global main               # 定义全局符号，使符号对其他文件可见
372 | .local  local_func         # 定义局部符号，仅在当前文件可见
373 | .type   main, @function    # 定义符号类型，@function表示这是一个函数
374 | .size   main, .-main       # 定义符号大小，.-main表示从当前位置到main标签的距离
375 | */
376 | func (p *Parser) pseudo(word string, args []LineToken) *Program {
377 | 	switch word {
378 | 	case ".section": // 分段
379 | 
380 | 	case ".global":
381 | 
382 | 	case ".local":
383 | 
384 | 	case ".type":
385 | 
386 | 	case ".size":
387 | 
388 | 	case ".align":
389 | 
390 | 	case "DATA":
391 | 		p.asmData(operands)
392 | 	case "FUNCDATA":
393 | 		p.asmFuncData(operands)
394 | 	case "GLOBL":
395 | 		p.asmGlobl(operands)
396 | 	case "PCDATA":
397 | 		p.asmPCData(operands)
398 | 	case "PCALIGN":
399 | 		p.asmPCAlign(operands)
400 | 	case "TEXT":
401 | 		p.asmText(operands) // 函数申明
402 | 	default: // 处理符号声明
403 | 		if len(args) > 0 && args[0].LiteralVal == ":" {
404 | 			// 说明是符号
405 | 		}
406 | 		return false
407 | 	}
408 | 	return true
409 | }
410 | 
411 | // asmText assembles a TEXT pseudo-op.
412 | // TEXT runtime·sigtramp(SB),4,$0-0
413 | func (p *Parser) asmText(operands [][]lex.Token) { // 记录一个函数到代码段
414 | 	if len(operands) != 2 && len(operands) != 3 {  // 参数至少是,2个或者,,3个
415 | 		p.errorf("expect two or three operands for TEXT")
416 | 		return
417 | 	}
418 | 
419 | 	// Labels are function scoped. Patch existing labels and
420 | 	// create a new label space for this TEXT.
421 | 	p.patch()                             // todo， 多次被调用
422 | 	p.labels = make(map[string]*obj.Prog) // 每次都初始化？
423 | 
424 | 	// Operand 0 is the symbol name in the form foo(SB).
425 | 	// That means symbol plus indirect on SB and no offset.
426 | 	nameAddr := p.address(operands[0]) // 计算地址？
427 | 	if !p.validSymbol("TEXT", &nameAddr, false) {
428 | 		return
429 | 	}
430 | 	name := symbolName(&nameAddr)
431 | 	next := 1
432 | 
433 | 	// Next operand is the optional text flag, a literal integer.
434 | 	var flag = int64(0)
435 | 	if len(operands) == 3 {
436 | 		flag = p.evalInteger("TEXT", operands[1])
437 | 		next++
438 | 	}
439 | 
440 | 	// Issue an error if we see a function defined as ABIInternal
441 | 	// without NOSPLIT. In ABIInternal, obj needs to know the function
442 | 	// signature in order to construct the morestack path, so this
443 | 	// currently isn't supported for asm functions.
444 | 	if nameAddr.Sym.ABI() == obj.ABIInternal && flag&obj.NOSPLIT == 0 {
445 | 		p.errorf("TEXT %q: ABIInternal requires NOSPLIT", name)
446 | 	}
447 | 
448 | 	// Next operand is the frame and arg size.
449 | 	// Bizarre syntax: $frameSize-argSize is two words, not subtraction.
450 | 	// Both frameSize and argSize must be simple integers; only frameSize
451 | 	// can be negative.
452 | 	// The "-argSize" may be missing; if so, set it to objabi.ArgsSizeUnknown.
453 | 	// Parse left to right.
454 | 	op := operands[next]
455 | 	if len(op) < 2 || op[0].ScanToken != '$' {
456 | 		p.errorf("TEXT %s: frame size must be an immediate constant", name)
457 | 		return
458 | 	}
459 | 	op = op[1:]
460 | 	negative := false
461 | 	if op[0].ScanToken == '-' {
462 | 		negative = true
463 | 		op = op[1:]
464 | 	}
465 | 	if len(op) == 0 || op[0].ScanToken != scanner.Int {
466 | 		p.errorf("TEXT %s: frame size must be an immediate constant", name)
467 | 		return
468 | 	}
469 | 	frameSize := p.positiveAtoi(op[0].String())
470 | 	if negative {
471 | 		frameSize = -frameSize
472 | 	}
473 | 	op = op[1:]
474 | 	argSize := int64(abi.ArgsSizeUnknown)
475 | 	if len(op) > 0 {
476 | 		// There is an argument size. It must be a minus sign followed by a non-negative integer literal.
477 | 		if len(op) != 2 || op[0].ScanToken != '-' || op[1].ScanToken != scanner.Int {
478 | 			p.errorf("TEXT %s: argument size must be of form -integer", name)
479 | 			return
480 | 		}
481 | 		argSize = p.positiveAtoi(op[1].String())
482 | 	}
483 | 	p.ctxt.InitTextSym(nameAddr.Sym, int(flag), p.pos())
484 | 	prog := &obj.Prog{
485 | 		Ctxt: p.ctxt,
486 | 		As:   obj.ATEXT,
487 | 		Pos:  p.pos(),
488 | 		From: nameAddr,
489 | 		To: obj.Addr{
490 | 			Type:   obj.TYPE_TEXTSIZE,
491 | 			Offset: frameSize,
492 | 			// Argsize set below.
493 | 		},
494 | 	}
495 | 	nameAddr.Sym.Func().Text = prog
496 | 	prog.To.Val = int32(argSize)
497 | 	p.append(prog, "", true) // 添加一个代码段？
498 | }
499 | 
500 | // asmData assembles a DATA pseudo-op.
501 | // DATA masks<>+0x00(SB)/4, $0x00000000
502 | func (p *Parser) asmData(operands [][]lex.Token) { // 记录一条数据到数据段
503 | 	if len(operands) != 2 {
504 | 		p.errorf("expect two operands for DATA")
505 | 		return
506 | 	}
507 | 
508 | 	// Operand 0 has the general form foo<>+0x04(SB)/4.
509 | 	op := operands[0]
510 | 	n := len(op)
511 | 	if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int {
512 | 		p.errorf("expect /size for DATA argument")
513 | 		return
514 | 	}
515 | 	szop := op[n-1].String()
516 | 	sz, err := strconv.Atoi(szop)
517 | 	if err != nil {
518 | 		p.errorf("bad size for DATA argument: %q", szop)
519 | 	}
520 | 	op = op[:n-2]
521 | 	nameAddr := p.address(op)
522 | 	if !p.validSymbol("DATA", &nameAddr, true) {
523 | 		return
524 | 	}
525 | 	name := symbolName(&nameAddr)
526 | 
527 | 	// Operand 1 is an immediate constant or address.
528 | 	valueAddr := p.address(operands[1])
529 | 	switch valueAddr.Type {
530 | 	case obj.TYPE_CONST, obj.TYPE_FCONST, obj.TYPE_SCONST, obj.TYPE_ADDR:
531 | 		// OK
532 | 	default:
533 | 		p.errorf("DATA value must be an immediate constant or address")
534 | 		return
535 | 	}
536 | 
537 | 	// The addresses must not overlap. Easiest test: require monotonicity.
538 | 	if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr {
539 | 		p.errorf("overlapping DATA entry for %s", name)
540 | 		return
541 | 	}
542 | 	p.dataAddr[name] = nameAddr.Offset + int64(sz)
543 | 
544 | 	switch valueAddr.Type {
545 | 	case obj.TYPE_CONST:
546 | 		switch sz {
547 | 		case 1, 2, 4, 8:
548 | 			nameAddr.Sym.WriteInt(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Offset)
549 | 		default:
550 | 			p.errorf("bad int size for DATA argument: %d", sz)
551 | 		}
552 | 	case obj.TYPE_FCONST:
553 | 		switch sz {
554 | 		case 4:
555 | 			nameAddr.Sym.WriteFloat32(p.ctxt, nameAddr.Offset, float32(valueAddr.Val.(float64)))
556 | 		case 8:
557 | 			nameAddr.Sym.WriteFloat64(p.ctxt, nameAddr.Offset, valueAddr.Val.(float64))
558 | 		default:
559 | 			p.errorf("bad float size for DATA argument: %d", sz)
560 | 		}
561 | 	case obj.TYPE_SCONST:
562 | 		nameAddr.Sym.WriteString(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Val.(string))
563 | 	case obj.TYPE_ADDR:
564 | 		if sz == p.arch.PtrSize {
565 | 			nameAddr.Sym.WriteAddr(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Sym, valueAddr.Offset)
566 | 		} else {
567 | 			p.errorf("bad addr size for DATA argument: %d", sz)
568 | 		}
569 | 	}
570 | }
571 | 
572 | func (p *parser) pseudo() bool  {
573 | 	
574 | }
575 | 
576 | func (p *Parser) Parse() *Program {
577 | 	scratch := make([][]lex.Token, 0, 3)
578 | 	for {
579 | 		word, cond, operands, ok := p.line(scratch) // operands = scratch 一维数组为每个参数， 逗号分割, 二维数组是具体的符号和 ident 两种
580 | 		if !ok {
581 | 			break
582 | 		}
583 | 		scratch = operands
584 | 
585 | 		if p.pseudo(word, operands) { // 处理伪指令，段落、符号定义 DATA TEXT
586 | 			continue
587 | 		}
588 | 		i, present := p.arch.Instructions[word] // 这里取指令操作码
589 | 		if present {
590 | 			p.instruction(i, word, cond, operands) // 最重要！处理指令
591 | 			continue
592 | 		}
593 | 		p.errorf("unrecognized instruction %q", word)
594 | 	}
595 | 	if p.errorCount > 0 {
596 | 		return nil, false
597 | 	}
598 | 	p.patch() // todo 不知道用途 可能跟标签有关
599 | 	return p.firstProg, true
600 | }
601 | 
602 | func NewParser(lex *lexer) *Parser {
603 | 	return &Parser{
604 | 		lex:         lex,
605 | 		labels:      make(map[string]*obj.Prog),
606 | 		dataAddr:    make(map[string]int64),
607 | 		errorWriter: os.Stderr,
608 | 		allowABI:    ctxt != nil && objabi.LookupPkgSpecial(ctxt.Pkgpath).AllowAsmABI,
609 | 		pkgPrefix:   pkgPrefix,
610 | 	}
611 | }
612 | 
613 | // parseDataDirective 解析数据定义伪指令
614 | func (p *parser) parseDataDirective() *ast.GenDecl {
615 | 	switch p.token {
616 | 	case ".byte":  // .byte
617 | 		return p.parseByteDirective()
618 | 	case ".word":  // .word
619 | 		return p.parseWordDirective()
620 | 	case ".long":  // .long
621 | 		return p.parseLongDirective()
622 | 	case ".quad":  // .quad
623 | 		return p.parseQuadDirective()
624 | 	case ".float", ".single":  // .float
625 | 		return p.parseQuadDirective()
626 | 	case ".double":  // .double
627 | 		return p.parseQuadDirective()
628 | 	case ".quad":  // .quad
629 | 		return p.parseQuadDirective()
630 | 	case ".ascii": // .ascii
631 | 		return p.parseAsciiDirective()
632 | 	case ".asciz": // .asciz
633 | 		return p.parseAscizDirective()
634 | 	case ".string": // .string
635 | 		return p.parseStringDirective()
636 | 	case ".rept":  // .rept
637 | 		return p.parseReptDirective()
638 | 	default:
639 | 		p.errorf("unknown data directive: %s", p.token)
640 | 		return nil
641 | 	}
642 | }
643 | 
644 | // parseByteDirective 解析.byte伪指令
645 | func (p *parser) parseByteDirective() *ast.GenDecl {
646 | 	decl := &ast.GenDecl{
647 | 		Tok: token.DATA,
648 | 	}
649 | 	
650 | 	p.next() // 跳过.byte
651 | 	
652 | 	// 解析值列表
653 | 	for {
654 | 		switch p.token {
655 | 		case INT:
656 | 			// 解析整数值
657 | 			val := utils.Int(p.id)
658 | 			decl.Specs = append(decl.Specs, &ast.ValueSpec{
659 | 				Type: &ast.Ident{Name: "byte"},
660 | 				Values: []ast.Expr{&ast.BasicLit{
661 | 					Kind:  token.INT,
662 | 					Value: strconv.FormatInt(val, 10),
663 | 				}},
664 | 			})
665 | 		case STRING:
666 | 			// 解析字符串
667 | 			for _, ch := range []byte(p.id) {
668 | 				decl.Specs = append(decl.Specs, &ast.ValueSpec{
669 | 					Type: &ast.Ident{Name: "byte"},
670 | 					Values: []ast.Expr{&ast.BasicLit{
671 | 						Kind:  token.INT,
672 | 						Value: strconv.FormatInt(int64(ch), 10),
673 | 					}},
674 | 				})
675 | 			}
676 | 		case IDENT:
677 | 			// 解析符号引用
678 | 			decl.Specs = append(decl.Specs, &ast.ValueSpec{
679 | 				Type: &ast.Ident{Name: "byte"},
680 | 				Values: []ast.Expr{&ast.Ident{
681 | 					Name: p.id,
682 | 				}},
683 | 			})
684 | 		default:
685 | 			p.errorf("invalid value in .byte directive")
686 | 			return nil
687 | 		}
688 | 		
689 | 		p.next()
690 | 		if p.token != COMMA {
691 | 			break
692 | 		}
693 | 		p.next()
694 | 	}
695 | 	
696 | 	return decl
697 | }
698 | 
699 | // parseAsciiDirective 解析.ascii伪指令
700 | func (p *parser) parseAsciiDirective() *ast.GenDecl {
701 | 	decl := &ast.GenDecl{
702 | 		Tok: token.DATA,
703 | 	}
704 | 	
705 | 	p.next() // 跳过.ascii
706 | 	
707 | 	if p.token != STRING {
708 | 		p.errorf("expected string literal after .ascii")
709 | 		return nil
710 | 	}
711 | 	
712 | 	// 将字符串转换为字节数组
713 | 	for _, ch := range []byte(p.id) {
714 | 		decl.Specs = append(decl.Specs, &ast.ValueSpec{
715 | 			Type: &ast.Ident{Name: "byte"},
716 | 			Values: []ast.Expr{&ast.BasicLit{
717 | 				Kind:  token.INT,
718 | 				Value: strconv.FormatInt(int64(ch), 10),
719 | 			}},
720 | 		})
721 | 	}
722 | 	
723 | 	p.next()
724 | 	return decl
725 | }
726 | 
727 | // parseAscizDirective 解析.asciz伪指令
728 | func (p *parser) parseAscizDirective() *ast.GenDecl {
729 | 	decl := p.parseAsciiDirective()
730 | 	if decl == nil {


--------------------------------------------------------------------------------
/compiler/compile/parser/parser_exp.go:
--------------------------------------------------------------------------------
  1 | package parser
  2 | 
  3 | import (
  4 | 	"github.com/facelang/face/compiler/compile/ast"
  5 | 	"github.com/facelang/face/compiler/compile/token"
  6 | )
  7 | 
  8 | // maxNestLev is the deepest we're willing to recurse during parsing
  9 | const maxNestLev int = 1e5
 10 | 
 11 | func incNestLev(p *parser) *parser {
 12 | 	p.nestLev++
 13 | 	if p.nestLev > maxNestLev {
 14 | 		p.error(p.pos, "exceeded max nesting depth")
 15 | 	}
 16 | 	return p
 17 | }
 18 | 
 19 | // decNestLev is used to track nesting depth during parsing to prevent stack exhaustion.
 20 | // It is used along with incNestLev in a similar fashion to how un and trace are used.
 21 | func decNestLev(p *parser) {
 22 | 	p.nestLev--
 23 | }
 24 | 
 25 | // ----------------------------------------------------------------------------
 26 | // Common productions
 27 | 
 28 | // inRhs = true 代表右侧表达式，否则为左侧表达式
 29 | func exprList(p *parser, inRhs bool) []ast.Expr {
 30 | 	old := p.inRhs
 31 | 	p.inRhs = inRhs
 32 | 
 33 | 	list := []ast.Expr{expr(p)}
 34 | 	for p.token == token.COMMA {
 35 | 		p.next()
 36 | 		list = append(list, expr(p))
 37 | 	}
 38 | 
 39 | 	p.inRhs = old
 40 | 	return list
 41 | }
 42 | 
 43 | // ----------------------------------------------------------------------------
 44 | // Expressions
 45 | 
 46 | //func (p *parser) parseFuncTypeOrLit() ast.Expr {
 47 | //
 48 | //	typ := p.parseFuncType()
 49 | //	if p.token != tokens.LBRACE {
 50 | //		// function type only
 51 | //		return typ
 52 | //	}
 53 | //
 54 | //	p.exprLev++
 55 | //	body := p.parseBody()
 56 | //	p.exprLev--
 57 | //
 58 | //	return &ast.FuncLit{Type: typ, Body: body}
 59 | //}
 60 | 
 61 | // operand may return an expression or a raw type (incl. array
 62 | // types of the form [...]T). Callers must verify the result.
 63 | func operand(p *parser) ast.Expr {
 64 | 	switch p.token {
 65 | 	case token.IDENT: // 变量符号
 66 | 		x := p.name()
 67 | 		return x
 68 | 
 69 | 	case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: // 值类型
 70 | 		x := &ast.BasicLit{Pos: 0, Kind: p.token, Value: p.identifier}
 71 | 		p.next()
 72 | 		return x
 73 | 
 74 | 	case token.LPAREN: // (...) 多了一层优先级
 75 | 		lparen := p.pos
 76 | 		p.next()
 77 | 		//p.exprLev++
 78 | 		x := exprRhs(p) // types may be parenthesized: (some type)
 79 | 		//p.exprLev--
 80 | 		rparen := p.expect(token.RPAREN)
 81 | 		return &ast.ParenExpr{Lparen: lparen, X: x, Rparen: rparen}
 82 | 
 83 | 		//case tokens.FUNC: // func ...
 84 | 		//	return p.parseFuncTypeOrLit() // todo 暂时忽略
 85 | 	}
 86 | 
 87 | 	// 上面都是具体值类型
 88 | 	// 下面是数据类型、关键字一类
 89 | 
 90 | 	// 类型转换 int(123), []string{"a", "b", "c"}
 91 | 	if typ := p.tryIdentOrType(); typ != nil { // do not consume trailing type parameters
 92 | 		// could be type for composite literal or conversion
 93 | 		if _, isIdent := typ.(*ast.Name); !isIdent {
 94 | 			p.error(p.pos, "type cannot be identifier")
 95 | 		}
 96 | 		return typ
 97 | 	}
 98 | 
 99 | 	// we have an error
100 | 	pos := p.pos
101 | 	p.unexpect("operand")
102 | 	return &ast.BadExpr{From: pos, To: p.pos}
103 | }
104 | 
105 | // 只在 parseElement 被调用
106 | func (p *parser) parseValue() ast.Expr {
107 | 	if p.token == token.LBRACE {
108 | 		return p.parseLiteralValue(nil)
109 | 	}
110 | 
111 | 	return expr(p)
112 | }
113 | 
114 | // 只在 parseElementList 被调用
115 | func (p *parser) parseElement() ast.Expr {
116 | 	x := p.parseValue()
117 | 	if p.token == token.COLON {
118 | 		colon := p.pos
119 | 		p.next()
120 | 		x = &ast.KeyValueExpr{Key: x, Colon: colon, Value: p.parseValue()}
121 | 	}
122 | 
123 | 	return x
124 | }
125 | 
126 | func (p *parser) parseElementList() (list []ast.Expr) {
127 | 	for p.token != token.RBRACE && p.token != token.EOF {
128 | 		list = append(list, p.parseElement())
129 | 		if p.token != token.COMMA {
130 | 			break
131 | 		}
132 | 		p.next()
133 | 	}
134 | 
135 | 	return
136 | }
137 | 
138 | // 解析复合字面量， {1, 2, 3} {key: value} 类型
139 | func (p *parser) parseLiteralValue(typ ast.Expr) ast.Expr {
140 | 	defer decNestLev(incNestLev(p))
141 | 
142 | 	lbrace := p.expect(token.LBRACE)
143 | 	var elts []ast.Expr
144 | 	//p.exprLev++
145 | 	if p.token != token.RBRACE {
146 | 		elts = p.parseElementList()
147 | 	}
148 | 	//p.exprLev--
149 | 	rbrace := p.expect(token.RBRACE)
150 | 	return &ast.CompositeLit{Type: typ, Lbrace: lbrace, Elts: elts, Rbrace: rbrace}
151 | }
152 | 
153 | // packIndexExpr returns an IndexExpr x[expr0] or IndexListExpr x[expr0, ...].
154 | func packIndexExpr(x ast.Expr, lbrack token.Pos, exprs []ast.Expr, rbrack token.Pos) ast.Expr {
155 | 	switch len(exprs) {
156 | 	case 0:
157 | 		panic("internal error: packIndexExpr with empty expr slice")
158 | 	case 1:
159 | 		return &ast.IndexExpr{
160 | 			X:      x,
161 | 			Lbrack: lbrack,
162 | 			Index:  exprs[0],
163 | 			Rbrack: rbrack,
164 | 		}
165 | 	default:
166 | 		return &ast.IndexListExpr{
167 | 			X:       x,
168 | 			Lbrack:  lbrack,
169 | 			Indices: exprs,
170 | 			Rbrack:  rbrack,
171 | 		}
172 | 	}
173 | }
174 | 
175 | func (p *parser) parseIndexOrSliceOrInstance(x ast.Expr) ast.Expr {
176 | 	lbrack := p.expect(token.LBRACK)
177 | 	if p.token == token.RBRACK { // 直接结束， 抛异常
178 | 		p.unexpect("[operand is empty]")
179 | 		rbrack := p.pos
180 | 		p.next()
181 | 		return &ast.IndexExpr{
182 | 			X:      x,
183 | 			Lbrack: lbrack,
184 | 			Index:  &ast.BadExpr{From: rbrack, To: rbrack},
185 | 			Rbrack: rbrack,
186 | 		}
187 | 	}
188 | 	//p.exprLev++
189 | 
190 | 	const N = 3         // [index] [:] [::]
191 | 	var args []ast.Expr // 值类型 [1, 2, 3]
192 | 	var index [N]ast.Expr
193 | 	var colons [N - 1]token.Pos
194 | 	if p.token != token.COLON {
195 | 		index[0] = exprRhs(p)
196 | 	}
197 | 	ncolons := 0
198 | 	switch p.token {
199 | 	case token.COLON:
200 | 		// slice expression
201 | 		for p.token == token.COLON && ncolons < len(colons) {
202 | 			colons[ncolons] = p.pos
203 | 			ncolons++
204 | 			p.next()
205 | 			if p.token != token.COLON && p.token != token.RBRACK && p.token != token.EOF {
206 | 				index[ncolons] = exprRhs(p)
207 | 			}
208 | 		}
209 | 	case token.COMMA: // ,
210 | 		// instance expression
211 | 		args = append(args, index[0])
212 | 		for p.token == token.COMMA {
213 | 			p.next()
214 | 			if p.token != token.RBRACK && p.token != token.EOF {
215 | 				args = append(args, p.parseType())
216 | 			}
217 | 		}
218 | 	}
219 | 
220 | 	// p.exprLev--
221 | 	rbrack := p.expect(token.RBRACK)
222 | 
223 | 	if ncolons > 0 { // 切片类型
224 | 		// slice expression
225 | 		slice3 := false
226 | 		if ncolons == 2 {
227 | 			slice3 = true
228 | 			// Check presence of middle and final index here rather than during type-checking
229 | 			// to prevent erroneous programs from passing through gofmt (was go.dev/issue/7305).
230 | 			if index[1] == nil {
231 | 				p.error(colons[0], "middle index required in 3-index slice")
232 | 				index[1] = &ast.BadExpr{From: colons[0] + 1, To: colons[1]}
233 | 			}
234 | 			if index[2] == nil {
235 | 				p.error(colons[1], "final index required in 3-index slice")
236 | 				index[2] = &ast.BadExpr{From: colons[1] + 1, To: rbrack}
237 | 			}
238 | 		}
239 | 		return &ast.SliceExpr{X: x, Lbrack: lbrack, Low: index[0], High: index[1], Max: index[2], Slice3: slice3, Rbrack: rbrack}
240 | 	}
241 | 
242 | 	if len(args) == 0 {
243 | 		// index expression
244 | 		return &ast.IndexExpr{X: x, Lbrack: lbrack, Index: index[0], Rbrack: rbrack}
245 | 	}
246 | 
247 | 	// instance expression
248 | 	return packIndexExpr(x, lbrack, args, rbrack)
249 | }
250 | 
251 | // 函数调用或类型转换，类型转换本身就是一种函数调用
252 | func (p *parser) funcCall(fun ast.Expr) *ast.CallExpr {
253 | 	lparen := p.expect(token.LPAREN) // 开始
254 | 	//p.exprLev++
255 | 	var list []ast.Expr
256 | 	var ellipsis token.Pos
257 | 	for p.token != token.RPAREN && p.token != token.EOF && !ellipsis.IsValid() {
258 | 		list = append(list, exprRhs(p)) // builtins may expect a type: make(some type, ...)
259 | 		if p.token == token.ELLIPSIS {
260 | 			ellipsis = p.pos
261 | 			p.next()
262 | 		}
263 | 
264 | 		// 逗号，继续解析下一个参数， 否则结束
265 | 		if p.token != token.COMMA {
266 | 			break
267 | 		}
268 | 		p.next()
269 | 	}
270 | 	//p.exprLev--
271 | 	rparen := p.expect(token.RPAREN) // 关闭
272 | 
273 | 	return &ast.CallExpr{Fun: fun, Lparen: lparen, Args: list, Ellipsis: ellipsis, Rparen: rparen}
274 | }
275 | 
276 | // 处理后缀表达式， 比如： x.name, x[123]
277 | func primaryExpr(p *parser, x ast.Expr) ast.Expr {
278 | 	if x == nil {
279 | 		x = operand(p)
280 | 	}
281 | 
282 | 	var n int
283 | 	//defer func() { p.nestLev -= n }()
284 | 	for n = 1; ; n++ { // 持续++
285 | 		//incNestLev(p)
286 | 		switch p.token {
287 | 		case token.PERIOD: // x. 只能接 ident
288 | 			p.next()
289 | 			x = &ast.SelectorExpr{X: x, Sel: p.name()}
290 | 		case token.LBRACK: // x[...], x[1], x[:]
291 | 			x = p.parseIndexOrSliceOrInstance(x) // todo
292 | 		case token.LPAREN: // x(...), 函数调用或类型转换
293 | 			x = p.funcCall(x)
294 | 		case token.LBRACE: // todo {} 什么意思？
295 | 			// operand may have returned a parenthesized complit
296 | 			// type; accept it but complain if we have a complit
297 | 			t := ast.Unparen(x) // 解括号 (), 获取 x 真实类型
298 | 			// determine if '{' belongs to a composite literal or a block statement
299 | 			switch t.(type) { // 一些特殊情况直接返回 x, 其它情况，需要继续解析
300 | 			case *ast.BadExpr, *ast.Name, *ast.SelectorExpr: // 有条件解析
301 | 				//if p.exprLev < 0 { // 有一些解析过程会将 exprLev = -1
302 | 				//	return x
303 | 				//}
304 | 				// x is possibly a composite literal type
305 | 			case *ast.IndexExpr, *ast.IndexListExpr: // 有条件解析
306 | 				//if p.exprLev < 0 {
307 | 				//	return x
308 | 				//}
309 | 				// x is possibly a composite literal type
310 | 			case *ast.ArrayType, *ast.StructType, *ast.MapType:
311 | 				// x is a composite literal type
312 | 				// 数组，结构体， 字典，直接解析
313 | 			default:
314 | 				return x
315 | 			}
316 | 			if t != x {
317 | 				p.error(t.Position(), "cannot parenthesize type in composite literal")
318 | 				// already progressed, no need to advance
319 | 			}
320 | 			x = p.parseLiteralValue(x) // todo 已实现，可能不需要
321 | 		default:
322 | 			return x
323 | 		}
324 | 	}
325 | }
326 | 
327 | // 一元运算符， go 支持 <- 和 *, 目前仅支持 +-!&|
328 | func unaryExpr(p *parser) ast.Expr {
329 | 	defer decNestLev(incNestLev(p))
330 | 
331 | 	switch p.token {
332 | 	case token.ADD, token.SUB, token.NOT, token.XOR, token.AND, token.TILDE: // +, -, !, ^， ~
333 | 		pos, op := p.pos, p.token
334 | 		p.next()
335 | 		x := unaryExpr(p) // 再解析...
336 | 		return &ast.UnaryExpr{OpPos: pos, Op: op, X: x}
337 | 	}
338 | 
339 | 	return primaryExpr(p, nil) // 更低级表达式
340 | }
341 | 
342 | // 获得 token 和 优先级； 特例：将右值表达式中的 赋值符号 视为 ==
343 | func precedence(p *parser) (token.Token, int) {
344 | 	tok := p.token
345 | 	if p.inRhs && tok == token.ASSIGN {
346 | 		tok = token.EQL
347 | 	}
348 | 	return tok, tok.Precedence() // 这个应该是优先级
349 | }
350 | 
351 | // 二元表达式
352 | func binaryExpr(p *parser, x ast.Expr, prec1 int) ast.Expr {
353 | 	if x == nil { // 第一次调用为空， 一定会执行
354 | 		x = unaryExpr(p) // 先取一元表达式
355 | 	}
356 | 
357 | 	var n int
358 | 	defer func() { p.nestLev -= n }()
359 | 	for n = 1; ; n++ {
360 | 		incNestLev(p)
361 | 		// 判断优先级
362 | 		op, oprec := precedence(p)
363 | 		if oprec < prec1 { // 传入优先级 会 +1, 所以相同优先级会终止
364 | 			return x
365 | 		}
366 | 		pos := p.expect(op)
367 | 		y := binaryExpr(p, nil, oprec+1) // 优先级 +1, 同优先级，直接返回
368 | 		x = &ast.BinaryExpr{X: x, OpPos: pos, Op: op, Y: y}
369 | 	}
370 | }
371 | 
372 | func exprRhs(p *parser) ast.Expr {
373 | 	old := p.inRhs
374 | 	p.inRhs = true
375 | 	x := expr(p)
376 | 	p.inRhs = old
377 | 	return x
378 | }
379 | 
380 | // The result may be a type or even a raw type ([...]int).
381 | // expr() -> binaryExpr() -> unaryExpr() -> pexpr() -> operand()
382 | // 从高到低： 二元运算符优先级最高, 其次一元运算符, 其他运算符, 操作数
383 | // 二元运算符 还需要进一步判断优先级
384 | func expr(p *parser) ast.Expr {
385 | 	return binaryExpr(p, nil, token.LowestPrec+1) // 最低优先级？
386 | }
387 | 
388 | type field struct {
389 | 	name *ast.Name
390 | 	typ  ast.Expr
391 | }
392 | 
393 | func (p *parser) parseDotsType() *ast.Ellipsis {
394 | 	pos := p.expect(token.ELLIPSIS)
395 | 	elt := p.parseType()
396 | 
397 | 	return &ast.Ellipsis{Ellipsis: pos, Elt: elt}
398 | }
399 | 
400 | // 解析单条参数， name 一般为空（大部分时间）， typesetsok 一般为 false
401 | func (p *parser) parseParamDecl(name *ast.Name, typeSetsOK bool) (f field) {
402 | 
403 | 	ptok := p.token
404 | 	if name != nil { // 有参数名， 强制 tokens.IDENT
405 | 		p.token = token.IDENT // force tokens.IDENT case in switch below
406 | 	} else if typeSetsOK && p.token == token.TILDE {
407 | 		// "~" ...
408 | 		return field{nil, p.embeddedElem(nil)}
409 | 	}
410 | 
411 | 	switch p.token { // 判断符号类型
412 | 	case token.IDENT:
413 | 		// name
414 | 		if name != nil {
415 | 			f.name = name
416 | 			p.token = ptok // 暂存， 恢复后尝试解析类型
417 | 		} else {
418 | 			f.name = p.name() // 解析参数名
419 | 		}
420 | 		switch p.token { // 再次判断符号
421 | 		case token.IDENT, token.MUL, token.ARROW, token.FUNC, token.CHAN, token.MAP, token.STRUCT, token.INTERFACE, token.LPAREN:
422 | 			// name type
423 | 			f.typ = p.parseType() // 解析符号
424 | 
425 | 		case token.LBRACK: // [] 数组类型
426 | 			// name "[" type1, ..., typeN "]" or name "[" n "]" type
427 | 			f.name, f.typ = p.parseArrayFieldOrTypeInstance(f.name)
428 | 
429 | 		case token.ELLIPSIS: // ... 可变参数
430 | 			// name "..." type
431 | 			f.typ = p.parseDotsType()
432 | 			return // don't allow ...type "|" ...
433 | 
434 | 		case token.PERIOD: // . 选择器 name.xxx, 这种一定判定为 类型， 而不是参数名
435 | 			// name "." ...
436 | 			f.typ = p.parseQualifiedIdent(f.name)
437 | 			f.name = nil
438 | 
439 | 		case token.TILDE: // ~ 类型约束
440 | 			if typeSetsOK {
441 | 				f.typ = p.embeddedElem(nil)
442 | 				return
443 | 			}
444 | 
445 | 		case token.OR: // | 类型约束
446 | 			if typeSetsOK {
447 | 				// name "|" typeset
448 | 				f.typ = p.embeddedElem(f.name)
449 | 				f.name = nil
450 | 				return
451 | 			}
452 | 		}
453 | 
454 | 	case token.MUL, token.ARROW, token.FUNC, token.LBRACK, token.CHAN, token.MAP, token.STRUCT, token.INTERFACE, token.LPAREN:
455 | 		// type
456 | 		f.typ = p.parseType()
457 | 
458 | 	case token.ELLIPSIS:
459 | 		// "..." type
460 | 		// (always accepted)
461 | 		f.typ = p.parseDotsType()
462 | 		return // don't allow ...type "|" ...
463 | 
464 | 	default:
465 | 		// TODO(rfindley): this is incorrect in the case of type parameter lists
466 | 		//                 (should be "']'" in that case)
467 | 		p.unexpect("')'")
468 | 	}
469 | 
470 | 	// [name] type "|"
471 | 	if typeSetsOK && p.token == token.OR && f.typ != nil {
472 | 		f.typ = p.embeddedElem(f.typ)
473 | 	}
474 | 
475 | 	return
476 | }
477 | 
478 | // 多处调用， 默认调用 name0, type0 = nil ] or )
479 | // parseMethodSpec中 name0 != nil, typ0 = nil ]
480 | // parseGenericType中 name0, typ0 != nil ]
481 | func (p *parser) parseParameterList(name0 *ast.Name, typ0 ast.Expr, closing token.Token) (params []*ast.Field) {
482 | 	// Type parameters are the only parameter list closed by ']'.
483 | 	tparams := closing == token.RBRACK // 是否是泛型参数
484 | 
485 | 	pos0 := p.pos
486 | 	if name0 != nil {
487 | 		pos0 = name0.Position()
488 | 	} else if typ0 != nil {
489 | 		pos0 = typ0.Position()
490 | 	}
491 | 
492 | 	// Note: The code below matches the corresponding code in the syntax
493 | 	//       parser closely. Changes must be reflected in either parser.
494 | 	//       For the code to match, we use the local []field list that
495 | 	//       corresponds to []syntax.Field. At the end, the list must be
496 | 	//       converted into an []*ast.Field.
497 | 
498 | 	var list []field
499 | 	var named int // number of parameters that have an explicit name and type
500 | 	var typed int // number of parameters that have an explicit type
501 | 
502 | 	// todo 第一个参数不为空，或者不是结束符，则继续解析
503 | 	//       p.tok != closing, 就会一直循环
504 | 	for name0 != nil || p.token != closing && p.token != token.EOF {
505 | 		var par field
506 | 		if typ0 != nil { // todo 有泛型参数的情况
507 | 			if tparams {
508 | 				typ0 = p.embeddedElem(typ0)
509 | 			}
510 | 			par = field{name0, typ0}
511 | 		} else { // 主要解析过程， 解析单条参数
512 | 			par = p.parseParamDecl(name0, tparams) // name0 可能为空
513 | 		}
514 | 		name0 = nil                            // 1st name was consumed if present // 第一次使用后删除
515 | 		typ0 = nil                             // 1st typ was consumed if present // 第一次使用后删除
516 | 		if par.name != nil || par.typ != nil { // 解析到参数，添加到list， 并统计（参数数量和类型数量）
517 | 			list = append(list, par)
518 | 			if par.name != nil && par.typ != nil {
519 | 				named++
520 | 			}
521 | 			if par.typ != nil { // 参数名可以为空？
522 | 				typed++
523 | 			}
524 | 			// todo 实际解析， 单类型参数，会被解析为 par.name && par.typ = nil
525 | 		}
526 | 		if p.token != token.COMMA {
527 | 			break
528 | 		}
529 | 		p.next() // 取下一个符号，继续解析
530 | 	}
531 | 
532 | 	if len(list) == 0 {
533 | 		return // not uncommon
534 | 	}
535 | 
536 | 	// distribute parameter types (len(list) > 0)
537 | 	if named == 0 { // 处理未命名参数， 声明段，可以不命名参数
538 | 		// all unnamed => found names are type names
539 | 		for i := 0; i < len(list); i++ { // 类似 func(int, string) 这样的会被解析为 只有 name, 需要转为 仅 type
540 | 			par := &list[i]
541 | 			if typ := par.name; typ != nil {
542 | 				par.typ = typ
543 | 				par.name = nil
544 | 			}
545 | 		}
546 | 		if tparams { // 一般为 false, 处理单泛型类型（没有类型约束）Class[T, B, C]， 直接抛出异常？？？
547 | 			// This is the same error handling as below, adjusted for type parameters only.
548 | 			// See comment below for details. (go.dev/issue/64534)
549 | 			var errPos token.Pos
550 | 			var msg string
551 | 			if named == typed /* same as typed == 0 */ {
552 | 				errPos = p.pos // position error at closing ]
553 | 				msg = "missing type constraint"
554 | 			} else {
555 | 				errPos = pos0 // position at opening [ or first name
556 | 				msg = "missing type parameter name"
557 | 				if len(list) == 1 {
558 | 					msg += " or invalid array length"
559 | 				}
560 | 			}
561 | 			p.error(errPos, msg)
562 | 		}
563 | 	} else if named != len(list) { // 类似 ？？ func (a, b, c int)
564 | 		// some named or we're in a type parameter list => all must be named
565 | 		var errPos token.Pos                  // left-most error position (or invalid)
566 | 		var typ ast.Expr                      // current type (from right to left)
567 | 		for i := len(list) - 1; i >= 0; i-- { // 从右向左扫描参数列表
568 | 			if par := &list[i]; par.typ != nil { // par.typ != nil 记录类型，向前
569 | 				typ = par.typ
570 | 				if par.name == nil { // 参数名为空？
571 | 					errPos = typ.Position() // 记录一个异常
572 | 					n := &ast.Name{Pos: errPos, Name: "_"}
573 | 					par.name = n // 记录一个 _ 下划线变量
574 | 				}
575 | 			} else if typ != nil { // par.typ == nil && typ != nil
576 | 				par.typ = typ
577 | 			} else {
578 | 				// par.typ == nil && typ == nil => we only have a par.name
579 | 				errPos = par.name.Position()
580 | 				par.typ = &ast.BadExpr{From: errPos, To: p.pos}
581 | 			}
582 | 		}
583 | 		if errPos.IsValid() { // par.name == nil || typ == nil && par.typ == nil
584 | 			// Not all parameters are named because named != len(list).
585 | 			// If named == typed, there must be parameters that have no types.
586 | 			// They must be at the end of the parameter list, otherwise types
587 | 			// would have been filled in by the right-to-left sweep above and
588 | 			// there would be no error.
589 | 			// If tparams is set, the parameter list is a type parameter list.
590 | 			var msg string
591 | 			if named == typed {
592 | 				errPos = p.pos // position error at closing token ) or ]
593 | 				if tparams {
594 | 					msg = "missing type constraint"
595 | 				} else {
596 | 					msg = "missing parameter type"
597 | 				}
598 | 			} else {
599 | 				if tparams {
600 | 					msg = "missing type parameter name"
601 | 					// go.dev/issue/60812
602 | 					if len(list) == 1 {
603 | 						msg += " or invalid array length"
604 | 					}
605 | 				} else {
606 | 					msg = "missing parameter name"
607 | 				}
608 | 			}
609 | 			p.error(errPos, msg)
610 | 		}
611 | 	}
612 | 
613 | 	// Convert list to []*ast.Field.
614 | 	// If list contains types only, each type gets its own ast.Field.
615 | 	if named == 0 {
616 | 		// parameter list consists of types only
617 | 		for _, par := range list { // 再一次过滤空异常
618 | 			if par.typ == nil {
619 | 				p.error(p.pos, "nil type in unnamed parameter list")
620 | 			}
621 | 			params = append(params, &ast.Field{Type: par.typ})
622 | 		}
623 | 		return
624 | 	}
625 | 
626 | 	// If the parameter list consists of named parameters with types,
627 | 	// collect all names with the same types into a single ast.Field.
628 | 	var names []*ast.Name
629 | 	var typ ast.Expr
630 | 	addParams := func() {
631 | 		if typ == nil {
632 | 			p.error(p.pos, "nil type in unnamed parameter list")
633 | 		}
634 | 		field := &ast.Field{Names: names, Type: typ}
635 | 		params = append(params, field)
636 | 		names = nil
637 | 	}
638 | 	for _, par := range list {
639 | 		if par.typ != typ {
640 | 			// 将参数分组，相同类型的参数，添加到一个字段
641 | 			if len(names) > 0 { // 第一次为0
642 | 				addParams() // 添加一次， 清空一次 names
643 | 			}
644 | 			typ = par.typ // 记录
645 | 		}
646 | 		names = append(names, par.name)
647 | 	}
648 | 	// 最后调用一次，避免循环结束漏掉了
649 | 	if len(names) > 0 {
650 | 		addParams()
651 | 	}
652 | 	return
653 | }
654 | 
655 | // 可以解析参数以及泛型参数， 包括：接收者、参数、返回值
656 | // 解析参数时， acceptTParams=true 可以同时解析泛型参数
657 | func (p *parser) parseParameters(acceptTParams bool) (tparams, params *ast.FieldList) {
658 | 	// todo 可以同时解析泛型参数
659 | 	if acceptTParams && p.token == token.LBRACK {
660 | 		opening := p.pos
661 | 		p.next()
662 | 		// [T any](params) syntax
663 | 		list := p.parseParameterList(nil, nil, token.RBRACK)
664 | 		rbrack := p.expect(token.RBRACK) // ] 结束
665 | 		tparams = &ast.FieldList{Opening: opening, List: list, Closing: rbrack}
666 | 		// Type parameter lists must not be empty.
667 | 		if tparams.NumFields() == 0 {
668 | 			p.error(tparams.Closing, "empty type parameter list")
669 | 			tparams = nil // avoid follow-on errors
670 | 		}
671 | 	}
672 | 
673 | 	// 这里开始解析参数列表
674 | 	opening := p.expect(token.LPAREN) // ()
675 | 
676 | 	var fields []*ast.Field
677 | 	if p.token != token.RPAREN { // ）结束，判定
678 | 		fields = p.parseParameterList(nil, nil, token.RPAREN)
679 | 	}
680 | 
681 | 	rparen := p.expect(token.RPAREN) // ） 消耗掉结束符
682 | 	params = &ast.FieldList{Opening: opening, List: fields, Closing: rparen}
683 | 
684 | 	return // 返回两个参数，一个 tparams 一个 params
685 | }
686 | 
687 | func (p *parser) parseResult() *ast.FieldList {
688 | 	if p.token == token.LPAREN {
689 | 		_, results := p.parseParameters(false)
690 | 		return results
691 | 	}
692 | 
693 | 	typ := p.tryIdentOrType()
694 | 	if typ != nil {
695 | 		list := make([]*ast.Field, 1)
696 | 		list[0] = &ast.Field{Type: typ}
697 | 		return &ast.FieldList{List: list}
698 | 	}
699 | 
700 | 	return nil
701 | }
702 | 


--------------------------------------------------------------------------------
/docs/汇编语法详解.md:
--------------------------------------------------------------------------------
   1 | # 汇编语言详解
   2 | 
   3 | ## 目录
   4 | 1. [汇编语言概述](#汇编语言概述)
   5 | 2. [汇编语言风格介绍](#汇编语言风格介绍)
   6 | 3. [AT&T 汇编语法详解](#att-汇编语法详解)
   7 | 4. [Intel 汇编语法详解](#intel-汇编语法详解)
   8 | 5. [Plan 9 汇编语法详解](#plan-9-汇编语法详解)
   9 | 5. [三种汇编语法结构对比](#三种汇编语法结构对比)
  10 | 6. [寄存器详解](#寄存器详解)
  11 | 7. [指令集详解](#指令集详解)
  12 | 8. [内存寻址](#内存寻址)
  13 | 9. [程序结构](#程序结构)
  14 | 10. [系统调用](#系统调用)
  15 | 11. [实际应用示例](#实际应用示例)
  16 | 
  17 | ## 汇编语言概述
  18 | 
  19 | ### 什么是汇编语言
  20 | 汇编语言是一种低级编程语言，它与机器语言有着一一对应的关系。汇编语言使用助记符（mnemonics）来表示机器指令，使得程序更容易编写和理解。
  21 | 
  22 | ### 汇编语言的特点
  23 | - 直接操作硬件
  24 | - 执行效率高
  25 | - 代码体积小
  26 | - 可移植性差
  27 | - 开发效率低
  28 | 
  29 | ### 汇编语言的用途
  30 | - 操作系统开发
  31 | - 驱动程序开发
  32 | - 嵌入式系统
  33 | - 性能优化
  34 | - 逆向工程
  35 | 
  36 | ### 汇编语言风格
  37 | 由于历史原因和不同厂商的实现，形成了多种不同的语法风格。主要的汇编语言风格包括：
  38 | 
  39 | 1. AT&T 风格
  40 |    - 由 AT&T 贝尔实验室开发
  41 |    - 在 Unix/Linux 系统中广泛使用
  42 |    - 语法特点：源操作数在前，目标操作数在后
  43 |    - 寄存器名前加 `%`，立即数前加 `$`
  44 | 
  45 | 2. Intel 风格
  46 |    - 由 Intel 公司开发
  47 |    - 在 Windows 和 DOS 系统中广泛使用
  48 |    - 语法特点：目标操作数在前，源操作数在后
  49 |    - 直接使用寄存器名，不使用特殊前缀
  50 | 
  51 | 3. NASM 风格
  52 |    - 开源汇编器 NASM 使用的语法
  53 |    - 基于 Intel 风格但有所扩展
  54 |    - 支持更多现代特性
  55 |    - 跨平台兼容性好
  56 | 
  57 | 4. MASM 风格
  58 |    - Microsoft 宏汇编器使用的语法
  59 |    - 基于 Intel 风格
  60 |    - 支持丰富的宏和伪指令
  61 |    - 主要用于 Windows 平台
  62 | 
  63 | 5. GAS 风格
  64 |    - GNU 汇编器使用的语法
  65 |    - 基于 AT&T 风格
  66 |    - 在 Linux 系统中广泛使用
  67 |    - 支持多种架构
  68 | 
  69 | 6. Plan 9 风格
  70 |    - 由贝尔实验室的 Plan 9 操作系统开发
  71 |    - 在 Go 语言中广泛使用
  72 |    - 语法特点：
  73 |       - 使用 `MOV` 等大写指令
  74 |       - 寄存器名前加 `R`（如 `R0`, `R1`）
  75 |       - 立即数前加 `$`
  76 |       - 内存引用使用 `(R0)` 形式
  77 |    - 优势：
  78 |       - 语法简洁统一
  79 |       - 跨平台支持好
  80 |       - 与 Go 语言工具链集成
  81 |       - 适合系统编程
  82 | 
  83 | ### 汇编语言风格对比
  84 | 
  85 | ```nasm
  86 | # AT&T 风格
  87 | movl    $42, %eax
  88 | addl    %ebx, %eax
  89 | movl    (%eax), %ebx
  90 | 
  91 | # Intel 风格
  92 | mov     eax, 42
  93 | add     eax, ebx
  94 | mov     ebx, [eax]
  95 | 
  96 | # NASM 风格
  97 | mov     eax, 42
  98 | add     eax, ebx
  99 | mov     ebx, [eax]
 100 | 
 101 | # MASM 风格
 102 | mov     eax, 42
 103 | add     eax, ebx
 104 | mov     ebx, [eax]
 105 | 
 106 | # Plan 9 风格
 107 | MOV     $42, R0
 108 | ADD     R1, R0
 109 | MOV     (R0), R1
 110 | ```
 111 | 
 112 | ### 最受欢迎的汇编语法风格
 113 | 
 114 | #### AT&T 语法
 115 | AT&T 语法是目前最受欢迎的汇编语法风格之一，主要原因包括：
 116 | 
 117 | 1. 开源社区支持
 118 |    - GCC/LLVM 编译器默认使用 AT&T 语法
 119 |    - 大多数开源工具链支持 AT&T 语法
 120 |    - 在 Linux 系统上工具链完善
 121 | 
 122 | 2. 跨平台性
 123 |    - 在 Unix/Linux 系统上统一使用
 124 |    - 支持多种 CPU 架构
 125 |    - 语法规则更加一致
 126 | 
 127 | 3. 教育领域
 128 |    - 大多数计算机体系结构课程使用 AT&T 语法
 129 |    - 教材和参考资料丰富
 130 |    - 学习资源更容易获取
 131 | 
 132 | 4. 工具支持
 133 |    - GDB 调试器默认使用 AT&T 语法
 134 |    - objdump 等工具支持 AT&T 语法
 135 |    - 开发工具链完善
 136 | 
 137 | #### Intel 语法
 138 | Intel 语法在特定领域仍然保持重要地位：
 139 | 
 140 | 1. Windows 平台
 141 |    - Visual Studio 支持 Intel 语法
 142 |    - Windows 驱动程序开发
 143 |    - 系统级编程
 144 | 
 145 | 2. 历史原因
 146 |    - 大量 legacy 代码使用 Intel 语法
 147 |    - 向后兼容性需求
 148 |    - 企业级应用支持
 149 | 
 150 | 3. 文档支持
 151 |    - Intel 官方文档使用 Intel 语法
 152 |    - 企业级应用文档
 153 |    - 驱动程序开发文档
 154 | 
 155 | 
 156 | ## AT&T 汇编语法详解
 157 | 
 158 | ### 基本指令
 159 | 
 160 | 汇编语言的基本指令用于执行各种操作，如数据传输、运算、控制流等。
 161 | 
 162 | 指令的基本格式：
 163 | ```
 164 | 操作码 源操作数, 目标操作数
 165 | ```
 166 | 
 167 | 格式说明：
 168 | - 操作码：指定要执行的操作
 169 | - 源操作数：提供操作的数据
 170 | - 目标操作数：存储操作结果
 171 | 
 172 | 示例：
 173 | ```nasm
 174 | movl    $42, %eax          # 立即数到寄存器
 175 | movl    %eax, %ebx         # 寄存器到寄存器
 176 | movl    (%eax), %ebx       # 内存到寄存器
 177 | movl    %eax, (%ebx)       # 寄存器到内存
 178 | ```
 179 | 
 180 | 注意：根据不同的CPU架构（x86、ARM、RISC-V等），指令的具体格式和操作数表示方式会有所不同。
 181 | 
 182 | ### 伪指令
 183 | AT&T 风格的伪指令主要用于定义段、符号和数据：
 184 | 
 185 | 1. 段定义伪指令
 186 | ```nasm
 187 | .section .text             # 代码段，用于存放指令机器码
 188 | .section .data             # 数据段，用于存放已初始化的数据
 189 | .section .bss              # 未初始化数据段，用于存放未初始化的数据
 190 | .section .rodata           # 只读数据段，用于存放常量数据
 191 | ```
 192 | 
 193 | 2. 符号定义伪指令
 194 | ```nasm
 195 | .global main               # 定义全局符号，使符号对其他文件可见
 196 | .local  local_func         # 定义局部符号，仅在当前文件可见
 197 | .type   main, @function    # 定义符号类型，@function表示这是一个函数
 198 | .size   main, .-main       # 定义符号大小，.-main表示从当前位置到main标签的距离
 199 | ```
 200 | 
 201 | 3. 数据定义伪指令
 202 | ```nasm
 203 | .byte   42                 # 定义8位数据（1字节）
 204 | .word   42                 # 定义16位数据（2字节）
 205 | .long   42                 # 定义32位数据（4字节）
 206 | .quad   42                 # 定义64位数据（8字节）
 207 | .ascii  "Hello"            # 定义ASCII字符串，不以null结尾
 208 | .asciz  "Hello"            # 定义以null结尾的ASCII字符串
 209 | .align  4                  # 4字节对齐，确保下一个数据从4字节边界开始
 210 | ```
 211 | 
 212 | 这些伪指令的正确使用对于生成正确的目标文件和调试信息非常重要。它们不仅影响程序的布局，还影响链接器如何处理符号，以及调试器如何显示程序信息。
 213 | 
 214 | ### 伪指令使用示例
 215 | 
 216 | 下面通过一个完整的示例程序来展示各种伪指令的实际使用场景：
 217 | 
 218 | ```nasm
 219 | # 文件信息
 220 | .file   "example.s"        # 指定源文件名，帮助调试器定位源代码
 221 | 
 222 | # 数据段定义
 223 | .section .data
 224 |     # 基本数据类型定义
 225 |     byte_val:   .byte   42          # 定义一个字节的变量
 226 |     word_val:   .word   0x1234      # 定义一个字的变量
 227 |     long_val:   .long   0x12345678  # 定义一个双字的变量
 228 |     quad_val:   .quad   0x1234567890ABCDEF  # 定义一个四字的变量
 229 | 
 230 |     # 字符串定义
 231 |     str1:       .ascii  "Hello"     # 普通ASCII字符串
 232 |     str2:       .asciz  "World"     # 以null结尾的字符串
 233 |     
 234 |     # 数组定义
 235 |     array:      .long   1, 2, 3, 4, 5  # 定义一个整数数组
 236 |     
 237 |     # 对齐示例
 238 |     .align 4                        # 确保下一个数据从4字节边界开始
 239 |     aligned_data: .long 0xFFFFFFFF  # 这个数据会被对齐到4字节边界
 240 | 
 241 | # 未初始化数据段
 242 | .section .bss
 243 |     buffer:     .space  1024        # 分配1KB的缓冲区
 244 |     .align 8                        # 8字节对齐
 245 |     aligned_buf: .space 64          # 分配64字节的对齐缓冲区
 246 | 
 247 | # 只读数据段
 248 | .section .rodata
 249 |     const_str:  .string "Constant"  # 定义只读字符串
 250 |     const_array: .long 1, 2, 3, 4   # 定义只读数组
 251 | 
 252 | # 代码段
 253 | .section .text
 254 |     # 全局函数定义
 255 |     .global main                    # 声明main为全局符号
 256 |     .type   main, @function         # 指定main为函数类型
 257 | main:
 258 |     pushl   %ebp                    # 保存旧的基址指针
 259 |     movl    %esp, %ebp             # 设置新的基址指针
 260 |     
 261 |     # 函数体
 262 |     movl    $0, %eax               # 返回值设为0
 263 |     
 264 |     movl    %ebp, %esp             # 恢复栈指针
 265 |     popl    %ebp                   # 恢复旧的基址指针
 266 |     ret                            # 返回
 267 |     .size   main, .-main           # 计算main函数的大小
 268 | 
 269 |     # 局部函数定义
 270 |     .local  helper_func            # 声明局部函数
 271 |     .type   helper_func, @function # 指定函数类型
 272 | helper_func:
 273 |     # 函数实现
 274 |     ret
 275 |     .size   helper_func, .-helper_func  # 计算函数大小
 276 | 
 277 | # 调试信息
 278 | .section .debug_info
 279 |     .long   0                      # 调试信息版本
 280 |     .string "example.s"            # 源文件名
 281 |     .long   1                      # 行号信息
 282 |     .long   10                     # 列号信息
 283 | 
 284 | # 编译器信息
 285 | .section .comment
 286 |     .string "GCC: (GNU) 9.3.0"     # 编译器版本信息
 287 | ```
 288 | 
 289 | 这个示例展示了各种伪指令的典型使用场景：
 290 | 
 291 | 1. 段定义和属性：
 292 |    - 使用`.section`定义不同的段
 293 |    - 使用`.align`控制数据对齐
 294 |    - 使用`.global`和`.local`控制符号可见性
 295 | 
 296 | 2. 数据定义：
 297 |    - 使用`.byte`、`.word`、`.long`、`.quad`定义不同大小的数据
 298 |    - 使用`.ascii`和`.asciz`定义字符串
 299 |    - 使用`.space`分配未初始化的空间
 300 | 
 301 | 3. 函数定义：
 302 |    - 使用`.type`指定函数类型
 303 |    - 使用`.size`计算函数大小
 304 |    - 使用`.global`和`.local`控制函数可见性
 305 | 
 306 | 4. 调试信息：
 307 |    - 使用`.file`指定源文件
 308 |    - 使用`.section .debug_info`添加调试信息
 309 |    - 使用`.section .comment`添加编译器信息
 310 | 
 311 | 这些伪指令的正确使用确保了：
 312 | - 数据正确对齐，提高访问效率
 313 | - 符号正确导出，支持链接
 314 | - 调试信息完整，便于调试
 315 | - 代码结构清晰，易于维护
 316 | 
 317 | ### 函数定义
 318 | ```nasm
 319 | .global main
 320 | main:
 321 |     pushl %ebp
 322 |     movl %esp, %ebp
 323 |     # 函数体
 324 |     movl %ebp, %esp
 325 |     popl %ebp
 326 |     ret
 327 | ```
 328 | 
 329 | ### 数据定义
 330 | ```nasm
 331 | .section .data
 332 |     msg:    .ascii "Hello"
 333 |     len:    .long 5
 334 |     array:  .long 1, 2, 3, 4, 5
 335 | ```
 336 | 
 337 | ### 注释风格
 338 | ```nasm
 339 | # 单行注释
 340 | /* 多行注释
 341 |    可以跨越多行 */
 342 | ```
 343 | 
 344 | ### 宏定义详解
 345 | AT&T 风格的宏定义支持参数化和条件编译：
 346 | 
 347 | 1. 基本宏定义
 348 | ```nasm
 349 | .macro push_reg reg
 350 |     pushl %\reg
 351 | .endm
 352 | 
 353 | .macro pop_reg reg
 354 |     popl %\reg
 355 | .endm
 356 | ```
 357 | 
 358 | 2. 带参数的宏
 359 | ```nasm
 360 | .macro mov_imm reg, imm
 361 |     movl $\imm, %\reg
 362 | .endm
 363 | 
 364 | .macro save_regs reg1, reg2, reg3
 365 |     pushl %\reg1
 366 |     pushl %\reg2
 367 |     pushl %\reg3
 368 | .endm
 369 | ```
 370 | 
 371 | 3. 条件宏
 372 | ```nasm
 373 | .macro debug_print msg
 374 | #ifdef DEBUG
 375 |     pushl %eax
 376 |     movl $\msg, %eax
 377 |     call print_debug
 378 |     popl %eax
 379 | #endif
 380 | .endm
 381 | ```
 382 | 
 383 | ### 条件编译
 384 | ```nasm
 385 | #ifdef DEBUG
 386 |     movl $1, %eax
 387 | #else
 388 |     movl $0, %eax
 389 | #endif
 390 | ```
 391 | 
 392 | 
 393 | ## Intel 汇编语法详解
 394 | 
 395 | ### 基本语法规则
 396 | ```nasm
 397 | ; 基本格式：操作码 目标操作数, 源操作数
 398 | mov     eax, 42            ; 立即数到寄存器
 399 | mov     ebx, eax           ; 寄存器到寄存器
 400 | mov     ebx, [eax]         ; 内存到寄存器
 401 | mov     [ebx], eax         ; 寄存器到内存
 402 | ```
 403 | 
 404 | ### 伪指令详解
 405 | Intel 风格的伪指令主要用于段定义和数据定义：
 406 | 
 407 | 1. 段定义伪指令
 408 | ```nasm
 409 | section .text              ; 代码段
 410 | section .data              ; 数据段
 411 | section .bss               # 未初始化数据段
 412 | section .rdata             # 只读数据段
 413 | ```
 414 | 
 415 | 2. 符号定义伪指令
 416 | ```nasm
 417 | global main                # 定义全局符号
 418 | extern printf              # 声明外部符号
 419 | public func                # 声明公共符号
 420 | ```
 421 | 
 422 | 3. 数据定义伪指令
 423 | ```nasm
 424 | db 42                      # 8位数据
 425 | dw 42                      # 16位数据
 426 | dd 42                      # 32位数据
 427 | dq 42                      # 64位数据
 428 | db "Hello", 0              # 以null结尾的字符串
 429 | times 10 db 0              # 重复定义
 430 | align 4                    # 4字节对齐
 431 | ```
 432 | 
 433 | ### 宏定义详解
 434 | Intel 风格的宏定义使用 MACRO 和 ENDM 关键字：
 435 | 
 436 | 1. 基本宏定义
 437 | ```nasm
 438 | push_reg MACRO reg
 439 |     push reg
 440 | ENDM
 441 | 
 442 | pop_reg MACRO reg
 443 |     pop reg
 444 | ENDM
 445 | ```
 446 | 
 447 | 2. 带参数的宏
 448 | ```nasm
 449 | mov_imm MACRO reg, imm
 450 |     mov reg, imm
 451 | ENDM
 452 | 
 453 | save_regs MACRO reg1, reg2, reg3
 454 |     push reg1
 455 |     push reg2
 456 |     push reg3
 457 | ENDM
 458 | ```
 459 | 
 460 | 3. 条件宏
 461 | ```nasm
 462 | debug_print MACRO msg
 463 | IFDEF DEBUG
 464 |     push eax
 465 |     mov eax, msg
 466 |     call print_debug
 467 |     pop eax
 468 | ENDIF
 469 | ENDM
 470 | ```
 471 | 
 472 | ### 条件编译
 473 | ```nasm
 474 | IFDEF DEBUG
 475 |     mov eax, 1
 476 | ELSE
 477 |     mov eax, 0
 478 | ENDIF
 479 | ```
 480 | 
 481 | ### 函数定义
 482 | ```nasm
 483 | global main
 484 | main:
 485 |     push ebp
 486 |     mov ebp, esp
 487 |     ; 函数体
 488 |     mov esp, ebp
 489 |     pop ebp
 490 |     ret
 491 | ```
 492 | 
 493 | ### 数据定义
 494 | ```nasm
 495 | section .data
 496 |     msg:    db "Hello"
 497 |     len:    dd 5
 498 |     array:  dd 1, 2, 3, 4, 5
 499 | ```
 500 | 
 501 | ### 注释风格
 502 | ```nasm
 503 | ; 单行注释
 504 | ; 多行注释
 505 | ; 每行都需要分号
 506 | ```
 507 | 
 508 | ## Plan 9 汇编语法详解
 509 | 
 510 | ### 基本语法规则
 511 | ```nasm
 512 | # 基本格式：操作码 源操作数, 目标操作数
 513 | MOV     $42, R0            ; 立即数到寄存器
 514 | MOV     R0, R1             ; 寄存器到寄存器
 515 | MOV     (R0), R1           ; 内存到寄存器
 516 | MOV     R1, (R0)           ; 寄存器到内存
 517 | ```
 518 | 
 519 | ### 伪指令详解
 520 | Plan 9 风格的伪指令主要用于函数和数据定义：
 521 | 
 522 | 1. 函数定义伪指令
 523 | ```nasm
 524 | TEXT ·main(SB), NOSPLIT, $0    # 函数定义
 525 | TEXT ·func(SB), $0-8           # 带栈帧大小的函数定义
 526 | ```
 527 | 
 528 | 2. 数据定义伪指令
 529 | ```nasm
 530 | DATA ·msg(SB)/8, $"Hello"      # 数据定义
 531 | GLOBL ·msg(SB), RODATA, $8     # 全局符号定义
 532 | ```
 533 | 
 534 | 3. 特殊伪指令
 535 | ```nasm
 536 | NOSPLIT                        # 表示函数不需要栈增长检查
 537 | RODATA                         # 只读数据段
 538 | NOPTR                          # 不包含指针的数据
 539 | ```
 540 | 
 541 | ### 函数定义详解
 542 | Plan 9 的函数定义格式非常特殊，需要详细解释：
 543 | 
 544 | 1. 基本格式
 545 | ```nasm
 546 | TEXT ·main(SB), NOSPLIT, $0
 547 | ```
 548 | - `TEXT`: 表示这是一个函数
 549 | - `·main`: 函数名，点号表示包名
 550 | - `(SB)`: 静态基址，表示这是一个静态函数
 551 | - `NOSPLIT`: 表示函数不需要栈增长检查
 552 | - `$0`: 栈帧大小，0表示不需要栈空间
 553 | 
 554 | 2. 带参数的函数
 555 | ```nasm
 556 | TEXT ·add(SB), NOSPLIT, $0-16
 557 | ```
 558 | - `$0-16`: 表示栈帧大小为0，参数总大小为16字节
 559 | 
 560 | 3. 带局部变量的函数
 561 | ```nasm
 562 | TEXT ·func(SB), $16-0
 563 | ```
 564 | - `$16`: 表示需要16字节的栈空间
 565 | - `-0`: 表示没有参数
 566 | 
 567 | 4. 完整的函数示例
 568 | ```nasm
 569 | TEXT ·main(SB), NOSPLIT, $0
 570 |     MOV R29, RSP            # 保存栈指针
 571 |     SUB $16, RSP            # 分配栈空间
 572 |     MOV R0, 8(RSP)          # 保存参数
 573 |     MOV R1, 16(RSP)         # 保存参数
 574 |     # 函数体
 575 |     ADD $16, RSP            # 恢复栈指针
 576 |     RET                     # 返回
 577 | ```
 578 | 
 579 | ### 宏定义详解
 580 | Plan 9 的宏定义使用 C 风格的预处理器指令：
 581 | 
 582 | 1. 基本宏定义
 583 | ```nasm
 584 | #define PUSH(reg) MOV reg, (SP)
 585 | #define POP(reg)  MOV (SP), reg
 586 | ```
 587 | 
 588 | 2. 带参数的宏
 589 | ```nasm
 590 | #define SAVE_REG(reg) MOV reg, -8(SP)
 591 | #define RESTORE_REG(reg) MOV -8(SP), reg
 592 | ```
 593 | 
 594 | 3. 条件宏
 595 | ```nasm
 596 | #ifdef DEBUG
 597 | #define DEBUG_PRINT(msg) MOV $msg, R0; CALL print_debug
 598 | #else
 599 | #define DEBUG_PRINT(msg)
 600 | #endif
 601 | ```
 602 | 
 603 | ## 三种汇编语法结构对比
 604 | 
 605 | ### 1. 指令格式
 606 | 
 607 | #### AT&T 风格
 608 | AT&T 风格的指令格式特点：
 609 | 1. 操作数顺序：源操作数在前，目标操作数在后
 610 | 2. 寄存器表示：寄存器名前加 `%` 符号
 611 | 3. 立即数表示：立即数前加 `$` 符号
 612 | 4. 内存引用：使用 `()` 表示内存地址
 613 | 5. 操作数大小：使用后缀表示操作数大小（b/w/l/q）
 614 | 
 615 | ```nasm
 616 | # 基本格式：操作码 源操作数, 目标操作数
 617 | movl    $42, %eax          # 立即数到寄存器
 618 | movl    %eax, %ebx         # 寄存器到寄存器
 619 | movl    (%eax), %ebx       # 内存到寄存器
 620 | movl    %eax, (%ebx)       # 寄存器到内存
 621 | 
 622 | # 操作数大小后缀
 623 | movb    $42, %al           # 8位操作
 624 | movw    $42, %ax           # 16位操作
 625 | movl    $42, %eax          # 32位操作
 626 | movq    $42, %rax          # 64位操作
 627 | 
 628 | # 复杂内存寻址
 629 | movl    (%eax,%ebx,4), %ecx    # 基址+变址*比例
 630 | movl    8(%eax), %ebx          # 带偏移的内存访问
 631 | ```
 632 | 
 633 | #### Intel 风格
 634 | Intel 风格的指令格式特点：
 635 | 1. 操作数顺序：目标操作数在前，源操作数在后
 636 | 2. 寄存器表示：直接使用寄存器名，不加前缀
 637 | 3. 立即数表示：直接使用数值，不加前缀
 638 | 4. 内存引用：使用 `[]` 表示内存地址
 639 | 5. 操作数大小：由寄存器或内存操作数类型决定
 640 | 
 641 | ```nasm
 642 | ; 基本格式：操作码 目标操作数, 源操作数
 643 | mov     eax, 42            ; 立即数到寄存器
 644 | mov     ebx, eax           ; 寄存器到寄存器
 645 | mov     ebx, [eax]         ; 内存到寄存器
 646 | mov     [ebx], eax         ; 寄存器到内存
 647 | 
 648 | ; 不同大小的操作
 649 | mov     al, 42             ; 8位操作
 650 | mov     ax, 42             ; 16位操作
 651 | mov     eax, 42            ; 32位操作
 652 | mov     rax, 42            ; 64位操作
 653 | 
 654 | ; 复杂内存寻址
 655 | mov     ecx, [eax+ebx*4]   ; 基址+变址*比例
 656 | mov     ebx, [eax+8]       ; 带偏移的内存访问
 657 | ```
 658 | 
 659 | #### Plan 9 风格
 660 | Plan 9 风格的指令格式特点：
 661 | 1. 操作数顺序：源操作数在前，目标操作数在后
 662 | 2. 寄存器表示：使用 `R` 前缀（如 R0, R1）
 663 | 3. 立即数表示：立即数前加 `$` 符号
 664 | 4. 内存引用：使用 `()` 表示内存地址
 665 | 5. 指令大写：所有指令都使用大写形式
 666 | 
 667 | ```nasm
 668 | # 基本格式：操作码 源操作数, 目标操作数
 669 | MOV     $42, R0            ; 立即数到寄存器
 670 | MOV     R0, R1             ; 寄存器到寄存器
 671 | MOV     (R0), R1           ; 内存到寄存器
 672 | MOV     R1, (R0)           ; 寄存器到内存
 673 | 
 674 | # 不同大小的操作
 675 | MOVB    $42, R0            ; 8位操作
 676 | MOVW    $42, R0            ; 16位操作
 677 | MOVL    $42, R0            ; 32位操作
 678 | MOVQ    $42, R0            ; 64位操作
 679 | 
 680 | # 复杂内存寻址
 681 | MOV     (R0)(R1*4), R2     ; 基址+变址*比例
 682 | MOV     8(R0), R1          ; 带偏移的内存访问
 683 | ```
 684 | 
 685 | ### 2. 伪指令
 686 | 
 687 | #### AT&T 风格
 688 | ```nasm
 689 | .section .text             # 代码段
 690 | .section .data             # 数据段
 691 | .global main               # 全局符号
 692 | .long 42                   # 32位整数
 693 | .ascii "Hello"             # ASCII字符串
 694 | .asciz "Hello"             # 以null结尾的字符串
 695 | ```
 696 | 
 697 | #### Intel 风格
 698 | ```nasm
 699 | section .text              ; 代码段
 700 | section .data              ; 数据段
 701 | global main                ; 全局符号
 702 | dd 42                      ; 32位整数
 703 | db "Hello"                 ; ASCII字符串
 704 | db "Hello", 0              ; 以null结尾的字符串
 705 | ```
 706 | 
 707 | #### Plan 9 风格
 708 | ```nasm
 709 | TEXT ·main(SB), NOSPLIT, $0    ; 函数定义
 710 | DATA ·msg(SB)/8, $"Hello"      ; 数据定义
 711 | GLOBL ·msg(SB), RODATA, $8     ; 全局符号
 712 | ```
 713 | 
 714 | ### 3. 宏定义
 715 | 
 716 | #### AT&T 风格
 717 | ```nasm
 718 | .macro push_reg reg
 719 |     pushl %\reg
 720 | .endm
 721 | 
 722 | .macro pop_reg reg
 723 |     popl %\reg
 724 | .endm
 725 | ```
 726 | 
 727 | #### Intel 风格
 728 | ```nasm
 729 | push_reg MACRO reg
 730 |     push reg
 731 | ENDM
 732 | 
 733 | pop_reg MACRO reg
 734 |     pop reg
 735 | ENDM
 736 | ```
 737 | 
 738 | #### Plan 9 风格
 739 | ```nasm
 740 | #define PUSH(reg) MOV reg, (SP)
 741 | #define POP(reg)  MOV (SP), reg
 742 | ```
 743 | 
 744 | ### 4. 条件编译
 745 | 
 746 | #### AT&T 风格
 747 | ```nasm
 748 | #ifdef DEBUG
 749 |     movl $1, %eax
 750 | #else
 751 |     movl $0, %eax
 752 | #endif
 753 | ```
 754 | 
 755 | #### Intel 风格
 756 | ```nasm
 757 | IFDEF DEBUG
 758 |     mov eax, 1
 759 | ELSE
 760 |     mov eax, 0
 761 | ENDIF
 762 | ```
 763 | 
 764 | #### Plan 9 风格
 765 | ```nasm
 766 | #ifdef DEBUG
 767 |     MOV $1, R0
 768 | #else
 769 |     MOV $0, R0
 770 | #endif
 771 | ```
 772 | 
 773 | ### 5. 函数定义
 774 | 
 775 | #### AT&T 风格
 776 | ```nasm
 777 | .global main
 778 | main:
 779 |     pushl %ebp
 780 |     movl %esp, %ebp
 781 |     # 函数体
 782 |     movl %ebp, %esp
 783 |     popl %ebp
 784 |     ret
 785 | ```
 786 | 
 787 | #### Intel 风格
 788 | ```nasm
 789 | global main
 790 | main:
 791 |     push ebp
 792 |     mov ebp, esp
 793 |     ; 函数体
 794 |     mov esp, ebp
 795 |     pop ebp
 796 |     ret
 797 | ```
 798 | 
 799 | #### Plan 9 风格
 800 | ```nasm
 801 | TEXT ·main(SB), NOSPLIT, $0
 802 |     MOV R29, RSP
 803 |     # 函数体
 804 |     RET
 805 | ```
 806 | 
 807 | ### 6. 数据定义
 808 | 
 809 | #### AT&T 风格
 810 | ```nasm
 811 | .section .data
 812 |     msg:    .ascii "Hello"
 813 |     len:    .long 5
 814 |     array:  .long 1, 2, 3, 4, 5
 815 | ```
 816 | 
 817 | #### Intel 风格
 818 | ```nasm
 819 | section .data
 820 |     msg:    db "Hello"
 821 |     len:    dd 5
 822 |     array:  dd 1, 2, 3, 4, 5
 823 | ```
 824 | 
 825 | #### Plan 9 风格
 826 | ```nasm
 827 | DATA ·msg(SB)/8, $"Hello"
 828 | DATA ·len(SB)/4, $5
 829 | DATA ·array(SB)/20, $1, $2, $3, $4, $5
 830 | ```
 831 | 
 832 | ### 7. 注释风格
 833 | 
 834 | #### AT&T 风格
 835 | ```nasm
 836 | # 单行注释
 837 | /* 多行注释
 838 |    可以跨越多行 */
 839 | ```
 840 | 
 841 | #### Intel 风格
 842 | ```nasm
 843 | ; 单行注释
 844 | ; 多行注释
 845 | ; 每行都需要分号
 846 | ```
 847 | 
 848 | #### Plan 9 风格
 849 | ```nasm
 850 | # 单行注释
 851 | /* 多行注释
 852 |    可以跨越多行 */
 853 | ```
 854 | 
 855 | ## 寄存器详解
 856 | 
 857 | ### 通用寄存器
 858 | - `eax`: 累加器
 859 | - `ebx`: 基址寄存器
 860 | - `ecx`: 计数器
 861 | - `edx`: 数据寄存器
 862 | 
 863 | ### 特殊寄存器
 864 | - `eip`: 指令指针
 865 | - `esp`: 栈指针
 866 | - `ebp`: 基址指针
 867 | - `eflags`: 标志寄存器
 868 | 
 869 | ### 标志位
 870 | - `CF`: 进位标志
 871 | - `ZF`: 零标志
 872 | - `SF`: 符号标志
 873 | - `OF`: 溢出标志
 874 | 
 875 | ## 指令集详解
 876 | 
 877 | ### 数据传输指令
 878 | ```nasm
 879 | mov     # 移动数据
 880 | push    # 压栈
 881 | pop     # 出栈
 882 | lea     # 加载有效地址
 883 | ```
 884 | 
 885 | ### 算术运算指令
 886 | ```nasm
 887 | add     # 加法
 888 | sub     # 减法
 889 | mul     # 乘法
 890 | div     # 除法
 891 | ```
 892 | 
 893 | ### 逻辑运算指令
 894 | ```nasm
 895 | and     # 与
 896 | or      # 或
 897 | xor     # 异或
 898 | not     # 非
 899 | ```
 900 | 
 901 | ### 移位指令
 902 | ```nasm
 903 | shl     # 左移
 904 | shr     # 逻辑右移
 905 | sar     # 算术右移
 906 | ```
 907 | 
 908 | ### 条件跳转指令
 909 | ```nasm
 910 | je      # 相等跳转
 911 | jne     # 不相等跳转
 912 | jg      # 大于跳转
 913 | jl      # 小于跳转
 914 | ```
 915 | 
 916 | ## 内存寻址
 917 | 
 918 | ### 寻址模式
 919 | 1. 立即寻址
 920 | 2. 寄存器寻址
 921 | 3. 直接寻址
 922 | 4. 寄存器间接寻址
 923 | 5. 基址寻址
 924 | 6. 变址寻址
 925 | 7. 基址变址寻址
 926 | 
 927 | ### 内存对齐
 928 | - 数据对齐原则
 929 | - 对齐指令
 930 | - 性能影响
 931 | 
 932 | ## 程序结构
 933 | 
 934 | ### 基本结构
 935 | ```nasm
 936 | .section .data
 937 |     ; 数据段
 938 | 
 939 | .section .text
 940 |     .global main
 941 | main:
 942 |     ; 代码段
 943 | ```
 944 | 
 945 | ### 函数调用
 946 | ```nasm
 947 | # 函数调用约定
 948 | push    %ebp
 949 | mov     %esp, %ebp
 950 | # 函数体
 951 | mov     %ebp, %esp
 952 | pop     %ebp
 953 | ret
 954 | ```
 955 | 
 956 | ### 栈操作
 957 | - 栈帧结构
 958 | - 参数传递
 959 | - 局部变量
 960 | 
 961 | ## 系统调用
 962 | 
 963 | ### Linux 系统调用
 964 | ```nasm
 965 | # 32位系统调用
 966 | mov     $1, %eax      # 系统调用号
 967 | mov     $1, %ebx      # 参数1
 968 | int     $0x80         # 触发系统调用
 969 | 
 970 | # 64位系统调用
 971 | mov     $1, %rax      # 系统调用号
 972 | mov     $1, %rdi      # 参数1
 973 | syscall               # 触发系统调用
 974 | ```
 975 | 
 976 | ### 常用系统调用
 977 | - 文件操作
 978 | - 进程控制
 979 | - 内存管理
 980 | - 网络通信
 981 | 
 982 | ## 实际应用示例
 983 | 
 984 | ### Hello World 程序
 985 | ```nasm
 986 | .section .data
 987 |     msg:    .ascii "Hello, World!\n"
 988 |     len:    .equ $ - msg
 989 | 
 990 | .section .text
 991 |     .global main
 992 | main:
 993 |     # 写入系统调用
 994 |     mov     $4, %eax      # sys_write
 995 |     mov     $1, %ebx      # stdout
 996 |     mov     $msg, %ecx    # 消息地址
 997 |     mov     $len, %edx    # 消息长度
 998 |     int     $0x80
 999 | 
1000 |     # 退出系统调用
1001 |     mov     $1, %eax      # sys_exit
1002 |     xor     %ebx, %ebx    # 返回码 0
1003 |     int     $0x80
1004 | ```
1005 | 
1006 | ### 简单计算器
1007 | ```nasm
1008 | .section .data
1009 |     num1:   .long 10
1010 |     num2:   .long 20
1011 |     result: .long 0
1012 | 
1013 | .section .text
1014 |     .global main
1015 | main:
1016 |     mov     num1, %eax
1017 |     add     num2, %eax
1018 |     mov     %eax, result
1019 |     ret
1020 | ```
1021 | 
1022 | ## 调试技巧
1023 | 
1024 | ### 常用调试工具
1025 | - GDB
1026 | - objdump
1027 | - strace
1028 | - ltrace
1029 | 
1030 | ### 调试方法
1031 | - 断点设置
1032 | - 单步执行
1033 | - 寄存器查看
1034 | - 内存查看
1035 | 
1036 | ## 性能优化
1037 | 
1038 | ### 优化技巧
1039 | - 指令选择
1040 | - 寄存器使用
1041 | - 内存访问
1042 | - 分支预测
1043 | 
1044 | ### 常见陷阱
1045 | - 指令延迟
1046 | - 缓存效应
1047 | - 流水线停顿
1048 | - 分支预测失败
1049 | 
1050 | ## 总结
1051 | 
1052 | 汇编语言虽然复杂，但掌握它对于理解计算机底层原理和进行系统级编程非常重要。通过本文的学习，您应该能够：
1053 | 1. 理解汇编语言的基本概念
1054 | 2. 掌握 AT&T 和 Intel 两种语法
1055 | 3. 编写简单的汇编程序
1056 | 4. 进行基本的调试和优化
1057 | 
1058 | ## 参考资料
1059 | 1. Intel 64 and IA-32 Architectures Software Developer's Manual
1060 | 2. AT&T Assembly Language Reference
1061 | 3. Linux System Call Table
1062 | 4. x86 Assembly Language Reference Manual 


--------------------------------------------------------------------------------
/compiler/compile/token/file.go:
--------------------------------------------------------------------------------
  1 | package token
  2 | 
  3 | import (
  4 | 	"cmp"
  5 | 	"fmt"
  6 | 	"slices"
  7 | 	"strconv"
  8 | 	"sync"
  9 | 	"sync/atomic"
 10 | )
 11 | 
 12 | // -----------------------------------------------------------------------------
 13 | // Positions
 14 | 
 15 | // Position describes an arbitrary source position
 16 | // including the file, line, and column location.
 17 | // A Position is valid if the line number is > 0.
 18 | type Position struct {
 19 | 	Filename string // filename, if any
 20 | 	Offset   int    // offset, starting at 0
 21 | 	Line     int    // line number, starting at 1
 22 | 	Column   int    // column number, starting at 1 (byte count)
 23 | }
 24 | 
 25 | // IsValid reports whether the position is valid.
 26 | func (pos *Position) IsValid() bool { return pos.Line > 0 }
 27 | 
 28 | // String returns a string in one of several forms:
 29 | //
 30 | //	file:line:column    valid position with file name
 31 | //	file:line           valid position with file name but no column (column == 0)
 32 | //	line:column         valid position without file name
 33 | //	line                valid position without file name and no column (column == 0)
 34 | //	file                invalid position with file name
 35 | //	-                   invalid position without file name
 36 | func (pos Position) String() string {
 37 | 	s := pos.Filename
 38 | 	if pos.IsValid() {
 39 | 		if s != "" {
 40 | 			s += ":"
 41 | 		}
 42 | 		s += strconv.Itoa(pos.Line)
 43 | 		if pos.Column != 0 {
 44 | 			s += fmt.Sprintf(":%d", pos.Column)
 45 | 		}
 46 | 	}
 47 | 	if s == "" {
 48 | 		s = "-"
 49 | 	}
 50 | 	return s
 51 | }
 52 | 
 53 | // Pos is a compact encoding of a source position within a file set.
 54 | // It can be converted into a [Position] for a more convenient, but much
 55 | // larger, representation.
 56 | //
 57 | // The Pos value for a given file is a number in the range [base, base+size],
 58 | // where base and size are specified when a file is added to the file set.
 59 | // The difference between a Pos value and the corresponding file base
 60 | // corresponds to the byte offset of that position (represented by the Pos value)
 61 | // from the beginning of the file. Thus, the file base offset is the Pos value
 62 | // representing the first byte in the file.
 63 | //
 64 | // To create the Pos value for a specific source offset (measured in bytes),
 65 | // first add the respective file to the current file set using [FileSet.AddFile]
 66 | // and then call [File.Pos](offset) for that file. Given a Pos value p
 67 | // for a specific file set fset, the corresponding [Position] value is
 68 | // obtained by calling fset.Position(p).
 69 | //
 70 | // Pos values can be compared directly with the usual comparison operators:
 71 | // If two Pos values p and q are in the same file, comparing p and q is
 72 | // equivalent to comparing the respective source file offsets. If p and q
 73 | // are in different files, p < q is true if the file implied by p was added
 74 | // to the respective file set before the file implied by q.
 75 | type Pos int
 76 | 
 77 | // The zero value for [Pos] is NoPos; there is no file and line information
 78 | // associated with it, and NoPos.IsValid() is false. NoPos is always
 79 | // smaller than any other [Pos] value. The corresponding [Position] value
 80 | // for NoPos is the zero value for [Position].
 81 | const NoPos Pos = 0
 82 | 
 83 | // IsValid reports whether the position is valid.
 84 | func (p Pos) IsValid() bool {
 85 | 	return p != NoPos
 86 | }
 87 | 
 88 | // -----------------------------------------------------------------------------
 89 | // File
 90 | 
 91 | // A File is a handle for a file belonging to a [FileSet].
 92 | // A File has a name, size, and line offset table.
 93 | type File struct {
 94 | 	name string // file name as provided to AddFile
 95 | 	base int    // Pos value range for this file is [base...base+size]
 96 | 	size int    // file size as provided to AddFile
 97 | 
 98 | 	// lines and infos are protected by mutex
 99 | 	mutex sync.Mutex
100 | 	lines []int // lines contains the offset of the first character for each line (the first entry is always 0)
101 | 	infos []lineInfo
102 | }
103 | 
104 | // Name returns the file name of file f as registered with AddFile.
105 | func (f *File) Name() string {
106 | 	return f.name
107 | }
108 | 
109 | // Base returns the base offset of file f as registered with AddFile.
110 | func (f *File) Base() int {
111 | 	return f.base
112 | }
113 | 
114 | // Size returns the size of file f as registered with AddFile.
115 | func (f *File) Size() int {
116 | 	return f.size
117 | }
118 | 
119 | // LineCount returns the number of lines in file f.
120 | func (f *File) LineCount() int {
121 | 	f.mutex.Lock()
122 | 	n := len(f.lines)
123 | 	f.mutex.Unlock()
124 | 	return n
125 | }
126 | 
127 | // AddLine adds the line offset for a new line.
128 | // The line offset must be larger than the offset for the previous line
129 | // and smaller than the file size; otherwise the line offset is ignored.
130 | func (f *File) AddLine(offset int) {
131 | 	f.mutex.Lock()
132 | 	if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size {
133 | 		f.lines = append(f.lines, offset)
134 | 	}
135 | 	f.mutex.Unlock()
136 | }
137 | 
138 | // MergeLine merges a line with the following line. It is akin to replacing
139 | // the newline character at the end of the line with a space (to not change the
140 | // remaining offsets). To obtain the line number, consult e.g. [Position.Line].
141 | // MergeLine will panic if given an invalid line number.
142 | func (f *File) MergeLine(line int) {
143 | 	if line < 1 {
144 | 		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
145 | 	}
146 | 	f.mutex.Lock()
147 | 	defer f.mutex.Unlock()
148 | 	if line >= len(f.lines) {
149 | 		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
150 | 	}
151 | 	// To merge the line numbered <line> with the line numbered <line+1>,
152 | 	// we need to remove the entry in lines corresponding to the line
153 | 	// numbered <line+1>. The entry in lines corresponding to the line
154 | 	// numbered <line+1> is located at index <line>, since indices in lines
155 | 	// are 0-based and line numbers are 1-based.
156 | 	copy(f.lines[line:], f.lines[line+1:])
157 | 	f.lines = f.lines[:len(f.lines)-1]
158 | }
159 | 
160 | // Lines returns the effective line offset table of the form described by [File.SetLines].
161 | // Callers must not mutate the result.
162 | func (f *File) Lines() []int {
163 | 	f.mutex.Lock()
164 | 	lines := f.lines
165 | 	f.mutex.Unlock()
166 | 	return lines
167 | }
168 | 
169 | // SetLines sets the line offsets for a file and reports whether it succeeded.
170 | // The line offsets are the offsets of the first character of each line;
171 | // for instance for the content "ab\nc\n" the line offsets are {0, 3}.
172 | // An empty file has an empty line offset table.
173 | // Each line offset must be larger than the offset for the previous line
174 | // and smaller than the file size; otherwise SetLines fails and returns
175 | // false.
176 | // Callers must not mutate the provided slice after SetLines returns.
177 | func (f *File) SetLines(lines []int) bool {
178 | 	// verify validity of lines table
179 | 	size := f.size
180 | 	for i, offset := range lines {
181 | 		if i > 0 && offset <= lines[i-1] || size <= offset {
182 | 			return false
183 | 		}
184 | 	}
185 | 
186 | 	// set lines table
187 | 	f.mutex.Lock()
188 | 	f.lines = lines
189 | 	f.mutex.Unlock()
190 | 	return true
191 | }
192 | 
193 | // SetLinesForContent sets the line offsets for the given file content.
194 | // It ignores position-altering //line comments.
195 | func (f *File) SetLinesForContent(content []byte) {
196 | 	var lines []int
197 | 	line := 0
198 | 	for offset, b := range content {
199 | 		if line >= 0 {
200 | 			lines = append(lines, line)
201 | 		}
202 | 		line = -1
203 | 		if b == '\n' {
204 | 			line = offset + 1
205 | 		}
206 | 	}
207 | 
208 | 	// set lines table
209 | 	f.mutex.Lock()
210 | 	f.lines = lines
211 | 	f.mutex.Unlock()
212 | }
213 | 
214 | // LineStart returns the [Pos] value of the start of the specified line.
215 | // It ignores any alternative positions set using [File.AddLineColumnInfo].
216 | // LineStart panics if the 1-based line number is invalid.
217 | func (f *File) LineStart(line int) Pos {
218 | 	if line < 1 {
219 | 		panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line))
220 | 	}
221 | 	f.mutex.Lock()
222 | 	defer f.mutex.Unlock()
223 | 	if line > len(f.lines) {
224 | 		panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines)))
225 | 	}
226 | 	return Pos(f.base + f.lines[line-1])
227 | }
228 | 
229 | // A lineInfo object describes alternative file, line, and column
230 | // number information (such as provided via a //line directive)
231 | // for a given file offset.
232 | type lineInfo struct {
233 | 	// fields are exported to make them accessible to gob
234 | 	Offset       int
235 | 	Filename     string
236 | 	Line, Column int
237 | }
238 | 
239 | // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument.
240 | // It is here for backward-compatibility for code prior to Go 1.11.
241 | func (f *File) AddLineInfo(offset int, filename string, line int) {
242 | 	f.AddLineColumnInfo(offset, filename, line, 1)
243 | }
244 | 
245 | // AddLineColumnInfo adds alternative file, line, and column number
246 | // information for a given file offset. The offset must be larger
247 | // than the offset for the previously added alternative line info
248 | // and smaller than the file size; otherwise the information is
249 | // ignored.
250 | //
251 | // AddLineColumnInfo is typically used to register alternative position
252 | // information for line directives such as //line filename:line:column.
253 | func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) {
254 | 	f.mutex.Lock()
255 | 	if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size {
256 | 		f.infos = append(f.infos, lineInfo{offset, filename, line, column})
257 | 	}
258 | 	f.mutex.Unlock()
259 | }
260 | 
261 | // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size.
262 | func (f *File) fixOffset(offset int) int {
263 | 	switch {
264 | 	case offset < 0:
265 | 		if !debug {
266 | 			return 0
267 | 		}
268 | 	case offset > f.size:
269 | 		if !debug {
270 | 			return f.size
271 | 		}
272 | 	default:
273 | 		return offset
274 | 	}
275 | 
276 | 	// only generate this code if needed
277 | 	if debug {
278 | 		panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])",
279 | 			0 /* for symmetry */, offset, f.size,
280 | 			f.base+offset, f.base, f.base+f.size))
281 | 	}
282 | 	return 0
283 | }
284 | 
285 | // Pos returns the Pos value for the given file offset.
286 | //
287 | // If offset is negative, the result is the file's start
288 | // position; if the offset is too large, the result is
289 | // the file's end position (see also go.dev/issue/57490).
290 | //
291 | // The following invariant, though not true for Pos values
292 | // in general, holds for the result p:
293 | // f.Pos(f.Offset(p)) == p.
294 | func (f *File) Pos(offset int) Pos {
295 | 	return Pos(f.base + f.fixOffset(offset))
296 | }
297 | 
298 | // Offset returns the offset for the given file position p.
299 | //
300 | // If p is before the file's start position (or if p is NoPos),
301 | // the result is 0; if p is past the file's end position,
302 | // the result is the file size (see also go.dev/issue/57490).
303 | //
304 | // The following invariant, though not true for offset values
305 | // in general, holds for the result offset:
306 | // f.Offset(f.Pos(offset)) == offset
307 | func (f *File) Offset(p Pos) int {
308 | 	return f.fixOffset(int(p) - f.base)
309 | }
310 | 
311 | // Line returns the line number for the given file position p;
312 | // p must be a [Pos] value in that file or [NoPos].
313 | func (f *File) Line(p Pos) int {
314 | 	return f.Position(p).Line
315 | }
316 | 
317 | func searchLineInfos(a []lineInfo, x int) int {
318 | 	i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int {
319 | 		return cmp.Compare(a.Offset, x)
320 | 	})
321 | 	if !found {
322 | 		// We want the lineInfo containing x, but if we didn't
323 | 		// find x then i is the next one.
324 | 		i--
325 | 	}
326 | 	return i
327 | }
328 | 
329 | // unpack returns the filename and line and column number for a file offset.
330 | // If adjusted is set, unpack will return the filename and line information
331 | // possibly adjusted by //line comments; otherwise those comments are ignored.
332 | func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) {
333 | 	f.mutex.Lock()
334 | 	filename = f.name
335 | 	if i := searchInts(f.lines, offset); i >= 0 {
336 | 		line, column = i+1, offset-f.lines[i]+1
337 | 	}
338 | 	if adjusted && len(f.infos) > 0 {
339 | 		// few files have extra line infos
340 | 		if i := searchLineInfos(f.infos, offset); i >= 0 {
341 | 			alt := &f.infos[i]
342 | 			filename = alt.Filename
343 | 			if i := searchInts(f.lines, alt.Offset); i >= 0 {
344 | 				// i+1 is the line at which the alternative position was recorded
345 | 				d := line - (i + 1) // line distance from alternative position base
346 | 				line = alt.Line + d
347 | 				if alt.Column == 0 {
348 | 					// alternative column is unknown => relative column is unknown
349 | 					// (the current specification for line directives requires
350 | 					// this to apply until the next PosBase/line directive,
351 | 					// not just until the new newline)
352 | 					column = 0
353 | 				} else if d == 0 {
354 | 					// the alternative position base is on the current line
355 | 					// => column is relative to alternative column
356 | 					column = alt.Column + (offset - alt.Offset)
357 | 				}
358 | 			}
359 | 		}
360 | 	}
361 | 	// TODO(mvdan): move Unlock back under Lock with a defer statement once
362 | 	// https://go.dev/issue/38471 is fixed to remove the performance penalty.
363 | 	f.mutex.Unlock()
364 | 	return
365 | }
366 | 
367 | func (f *File) position(p Pos, adjusted bool) (pos Position) {
368 | 	offset := f.fixOffset(int(p) - f.base)
369 | 	pos.Offset = offset
370 | 	pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted)
371 | 	return
372 | }
373 | 
374 | // PositionFor returns the Position value for the given file position p.
375 | // If p is out of bounds, it is adjusted to match the File.Offset behavior.
376 | // If adjusted is set, the position may be adjusted by position-altering
377 | // //line comments; otherwise those comments are ignored.
378 | // p must be a Pos value in f or NoPos.
379 | func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) {
380 | 	if p != NoPos {
381 | 		pos = f.position(p, adjusted)
382 | 	}
383 | 	return
384 | }
385 | 
386 | // Position returns the Position value for the given file position p.
387 | // If p is out of bounds, it is adjusted to match the File.Offset behavior.
388 | // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true).
389 | func (f *File) Position(p Pos) (pos Position) {
390 | 	return f.PositionFor(p, true)
391 | }
392 | 
393 | // -----------------------------------------------------------------------------
394 | // FileSet
395 | 
396 | // A FileSet represents a set of source files.
397 | // Methods of file sets are synchronized; multiple goroutines
398 | // may invoke them concurrently.
399 | //
400 | // The byte offsets for each file in a file set are mapped into
401 | // distinct (integer) intervals, one interval [base, base+size]
402 | // per file. [FileSet.Base] represents the first byte in the file, and size
403 | // is the corresponding file size. A [Pos] value is a value in such
404 | // an interval. By determining the interval a [Pos] value belongs
405 | // to, the file, its file base, and thus the byte offset (position)
406 | // the [Pos] value is representing can be computed.
407 | //
408 | // When adding a new file, a file base must be provided. That can
409 | // be any integer value that is past the end of any interval of any
410 | // file already in the file set. For convenience, [FileSet.Base] provides
411 | // such a value, which is simply the end of the Pos interval of the most
412 | // recently added file, plus one. Unless there is a need to extend an
413 | // interval later, using the [FileSet.Base] should be used as argument
414 | // for [FileSet.AddFile].
415 | //
416 | // A [File] may be removed from a FileSet when it is no longer needed.
417 | // This may reduce memory usage in a long-running application.
418 | type FileSet struct {
419 | 	mutex sync.RWMutex         // protects the file set
420 | 	base  int                  // base offset for the next file
421 | 	files []*File              // list of files in the order added to the set
422 | 	last  atomic.Pointer[File] // cache of last file looked up
423 | }
424 | 
425 | // NewFileSet creates a new file set.
426 | func NewFileSet() *FileSet {
427 | 	return &FileSet{
428 | 		base: 1, // 0 == NoPos
429 | 	}
430 | }
431 | 
432 | // Base returns the minimum base offset that must be provided to
433 | // [FileSet.AddFile] when adding the next file.
434 | func (s *FileSet) Base() int {
435 | 	s.mutex.RLock()
436 | 	b := s.base
437 | 	s.mutex.RUnlock()
438 | 	return b
439 | }
440 | 
441 | // AddFile adds a new file with a given filename, base offset, and file size
442 | // to the file set s and returns the file. Multiple files may have the same
443 | // name. The base offset must not be smaller than the [FileSet.Base], and
444 | // size must not be negative. As a special case, if a negative base is provided,
445 | // the current value of the [FileSet.Base] is used instead.
446 | //
447 | // Adding the file will set the file set's [FileSet.Base] value to base + size + 1
448 | // as the minimum base value for the next file. The following relationship
449 | // exists between a [Pos] value p for a given file offset offs:
450 | //
451 | //	int(p) = base + offs
452 | //
453 | // with offs in the range [0, size] and thus p in the range [base, base+size].
454 | // For convenience, [File.Pos] may be used to create file-specific position
455 | // values from a file offset.
456 | func (s *FileSet) AddFile(filename string, base, size int) *File {
457 | 	// Allocate f outside the critical section.
458 | 	f := &File{name: filename, size: size, lines: []int{0}}
459 | 
460 | 	s.mutex.Lock()
461 | 	defer s.mutex.Unlock()
462 | 	if base < 0 {
463 | 		base = s.base
464 | 	}
465 | 	if base < s.base {
466 | 		panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base))
467 | 	}
468 | 	f.base = base
469 | 	if size < 0 {
470 | 		panic(fmt.Sprintf("invalid size %d (should be >= 0)", size))
471 | 	}
472 | 	// base >= s.base && size >= 0
473 | 	base += size + 1 // +1 because EOF also has a position
474 | 	if base < 0 {
475 | 		panic("token.Pos offset overflow (> 2G of source code in file set)")
476 | 	}
477 | 	// add the file to the file set
478 | 	s.base = base
479 | 	s.files = append(s.files, f)
480 | 	s.last.Store(f)
481 | 	return f
482 | }
483 | 
484 | // RemoveFile removes a file from the [FileSet] so that subsequent
485 | // queries for its [Pos] interval yield a negative result.
486 | // This reduces the memory usage of a long-lived [FileSet] that
487 | // encounters an unbounded stream of files.
488 | //
489 | // Removing a file that does not belong to the set has no effect.
490 | func (s *FileSet) RemoveFile(file *File) {
491 | 	s.last.CompareAndSwap(file, nil) // clear last file cache
492 | 
493 | 	s.mutex.Lock()
494 | 	defer s.mutex.Unlock()
495 | 
496 | 	if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file {
497 | 		last := &s.files[len(s.files)-1]
498 | 		s.files = slices.Delete(s.files, i, i+1)
499 | 		*last = nil // don't prolong lifetime when popping last element
500 | 	}
501 | }
502 | 
503 | // Iterate calls f for the files in the file set in the order they were added
504 | // until f returns false.
505 | func (s *FileSet) Iterate(f func(*File) bool) {
506 | 	for i := 0; ; i++ {
507 | 		var file *File
508 | 		s.mutex.RLock()
509 | 		if i < len(s.files) {
510 | 			file = s.files[i]
511 | 		}
512 | 		s.mutex.RUnlock()
513 | 		if file == nil || !f(file) {
514 | 			break
515 | 		}
516 | 	}
517 | }
518 | 
519 | func searchFiles(a []*File, x int) int {
520 | 	i, found := slices.BinarySearchFunc(a, x, func(a *File, x int) int {
521 | 		return cmp.Compare(a.base, x)
522 | 	})
523 | 	if !found {
524 | 		// We want the File containing x, but if we didn't
525 | 		// find x then i is the next one.
526 | 		i--
527 | 	}
528 | 	return i
529 | }
530 | 
531 | func (s *FileSet) file(p Pos) *File {
532 | 	// common case: p is in last file.
533 | 	if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size {
534 | 		return f
535 | 	}
536 | 
537 | 	s.mutex.RLock()
538 | 	defer s.mutex.RUnlock()
539 | 
540 | 	// p is not in last file - search all files
541 | 	if i := searchFiles(s.files, int(p)); i >= 0 {
542 | 		f := s.files[i]
543 | 		// f.base <= int(p) by definition of searchFiles
544 | 		if int(p) <= f.base+f.size {
545 | 			// Update cache of last file. A race is ok,
546 | 			// but an exclusive lock causes heavy contention.
547 | 			s.last.Store(f)
548 | 			return f
549 | 		}
550 | 	}
551 | 	return nil
552 | }
553 | 
554 | // File returns the file that contains the position p.
555 | // If no such file is found (for instance for p == [NoPos]),
556 | // the result is nil.
557 | func (s *FileSet) File(p Pos) (f *File) {
558 | 	if p != NoPos {
559 | 		f = s.file(p)
560 | 	}
561 | 	return
562 | }
563 | 
564 | // PositionFor converts a [Pos] p in the fileset into a [Position] value.
565 | // If adjusted is set, the position may be adjusted by position-altering
566 | // //line comments; otherwise those comments are ignored.
567 | // p must be a [Pos] value in s or [NoPos].
568 | func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) {
569 | 	if p != NoPos {
570 | 		if f := s.file(p); f != nil {
571 | 			return f.position(p, adjusted)
572 | 		}
573 | 	}
574 | 	return
575 | }
576 | 
577 | // Position converts a [Pos] p in the fileset into a Position value.
578 | // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true).
579 | func (s *FileSet) Position(p Pos) (pos Position) {
580 | 	return s.PositionFor(p, true)
581 | }
582 | 
583 | // -----------------------------------------------------------------------------
584 | // Helper functions
585 | 
586 | func searchInts(a []int, x int) int {
587 | 	// This function body is a manually inlined version of:
588 | 	//
589 | 	//   return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1
590 | 	//
591 | 	// With better compiler optimizations, this may not be needed in the
592 | 	// future, but at the moment this change improves the go/printer
593 | 	// benchmark performance by ~30%. This has a direct impact on the
594 | 	// speed of gofmt and thus seems worthwhile (2011-04-29).
595 | 	// TODO(gri): Remove this when compilers have caught up.
596 | 	i, j := 0, len(a)
597 | 	for i < j {
598 | 		h := int(uint(i+j) >> 1) // avoid overflow when computing h
599 | 		// i ≤ h < j
600 | 		if a[h] <= x {
601 | 			i = h + 1
602 | 		} else {
603 | 			j = h
604 | 		}
605 | 	}
606 | 	return i - 1
607 | }
608 | 


--------------------------------------------------------------------------------