├── internal ├── os │ └── elf │ │ ├── section.go │ │ ├── debug.go │ │ ├── prog.go │ │ ├── writer.go │ │ ├── reader.go │ │ └── file.go ├── prog │ ├── file.go │ └── error.go ├── arch │ └── arch.go ├── utils │ └── convert.go └── reader │ ├── string.go │ ├── reader.go │ ├── decimal_test.go │ ├── reader_test.go │ └── decimal.go ├── go.mod ├── .gitignore ├── compiler ├── assemble │ ├── internal │ │ ├── instr.go │ │ ├── program.go │ │ ├── operand.go │ │ ├── label.go │ │ ├── lexer.go │ │ ├── token.go │ │ └── parser.go │ └── main.go ├── compile │ ├── parser │ │ ├── parser_block.go │ │ ├── lexer.go │ │ ├── params.go │ │ ├── parser.go │ │ ├── types.go │ │ └── parser_exp.go │ ├── main.go │ ├── ast │ │ └── scope.go │ └── token │ │ ├── token.go │ │ └── file.go └── link │ └── main.go ├── go.sum ├── LICENSE ├── README.md └── docs ├── ascii.md └── 汇编语法详解.md /internal/os/elf/section.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | // Section 段信息 4 | type Section struct { 5 | Name string // 名称 6 | Offset int // 偏移 7 | Length int // 内容大小 8 | } 9 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/facelang/face 2 | 3 | go 1.24 4 | 5 | require ( 6 | github.com/olekukonko/tablewriter v0.0.5 7 | golang.org/x/arch v0.17.0 8 | ) 9 | 10 | require github.com/mattn/go-runewidth v0.0.9 // indirect 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .idea/ 3 | .vscode/ 4 | .DS_Store 5 | 6 | **/go.sum 7 | **/*.exe 8 | **/*.exe~ 9 | **/*.dll 10 | **/*.so 11 | **/*.dylib 12 | **/*.exe 13 | 14 | # 编译输出 15 | /bin/ 16 | /pkg/ 17 | /pkgs/ 18 | /compiler/provider/ 19 | /example/ 20 | /tools/ -------------------------------------------------------------------------------- /compiler/assemble/internal/instr.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | // instr 表示一条汇编指令 4 | type instr struct { 5 | Opcode string // 操作码 6 | Src *operand // 源操作数 7 | Dst *operand // 目标操作数 8 | Size int // 操作数大小(byte/word/dword/qword) 9 | } 10 | -------------------------------------------------------------------------------- /internal/prog/file.go: -------------------------------------------------------------------------------- 1 | package prog 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type FilePos struct { 8 | Filename string 9 | Col, Line, Offset int 10 | } 11 | 12 | func (i *FilePos) String() string { 13 | return fmt.Sprintf("行: %d, 列: %d, 文件名:%s", i.Line+1, i.Col+1, i.Filename) 14 | } 15 | -------------------------------------------------------------------------------- /compiler/compile/parser/parser_block.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | // ---------------------------------------------------------------------------- 4 | // Blocks 5 | 6 | // 与函数 parseBlockStmt 完全等价! 7 | //func (p *parser) parseBody() *ast.BlockStmt { 8 | // lbrace := p.expect(LBRACE) // { 9 | // list := p.parseStmtList() 10 | // rbrace := p.expect(RBRACE) // } 11 | // 12 | // return &ast.BlockStmt{Lbrace: lbrace, List: list, Rbrace: rbrace} 13 | //} 14 | -------------------------------------------------------------------------------- /compiler/assemble/internal/program.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | type ProgType byte 4 | 5 | const ( 6 | Unknown ProgType = iota 7 | Instr // 指令 8 | Label // 符号定义 9 | Section // 段标记 10 | Global // 全局符号 11 | Local // 本地符号 12 | Type // .type 指定类型 13 | Size // .size 指定大小 14 | ) 15 | 16 | type Program struct { 17 | Type ProgType 18 | Name string 19 | Pc int64 20 | } 21 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0= 2 | github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI= 3 | github.com/olekukonko/tablewriter v0.0.5 h1:P2Ga83D34wi1o9J6Wh1mRuqd4mF/x/lgBS7N7AbDhec= 4 | github.com/olekukonko/tablewriter v0.0.5/go.mod h1:hPp6KlRPjbx+hW8ykQs1w3UBbZlj6HuIJcUGPhkA7kY= 5 | golang.org/x/arch v0.17.0 h1:4O3dfLzd+lQewptAHqjewQZQDyEdejz3VwgeYwkZneU= 6 | golang.org/x/arch v0.17.0/go.mod h1:bdwinDaKcfZUGpH09BB7ZmOfhalA8lQdzl62l8gGWsk= 7 | -------------------------------------------------------------------------------- /compiler/link/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "face-lang/compiler/provider/link" 5 | "fmt" 6 | ) 7 | 8 | func main() { 9 | //buf, err := os.ReadFile("common.t") 10 | //if err != nil { 11 | // panic(err) 12 | //} 13 | //for _, b := range buf { 14 | // fmt.Printf("%d, ", b) 15 | //} 16 | //_ = asm.Program("example/hello.s") 17 | //println("完成编译!") 18 | //file, _ := elf.ReadElf("common.o") 19 | //file.Objdump() 20 | 21 | err := link.Link("example/hello", "example/common.s.o", "example/hello.s.o") 22 | if err != nil { 23 | panic(err) 24 | } 25 | fmt.Println("链接完成!") 26 | 27 | } 28 | -------------------------------------------------------------------------------- /internal/arch/arch.go: -------------------------------------------------------------------------------- 1 | package arch 2 | 3 | // Arch wraps the link architecture object with more architecture-specific information. 4 | type Arch struct { 5 | //*obj.LinkArch 6 | // Map of instruction names to enumeration. 7 | InstrTable map[string]obj.As 8 | // Map of register names to enumeration. 9 | Register map[string]int16 10 | // Table of register prefix names. These are things like R for R(0) and SPR for SPR(268). 11 | RegisterPrefix map[string]bool 12 | // RegisterNumber converts R(10) into arm.REG_R10. 13 | RegisterNumber func(string, int16) (int16, bool) 14 | // Instruction is a jump. 15 | IsJump func(word string) bool 16 | } 17 | -------------------------------------------------------------------------------- /compiler/assemble/internal/operand.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | // 操作数类型 4 | const ( 5 | REGISTER = iota // 寄存器 6 | IMMEDIATE // 立即数 7 | MEMORY // 内存引用 8 | SYMBOL // 符号/标签 9 | ) 10 | 11 | type ExpType byte 12 | 13 | const EXP_ADD ExpType = 1 14 | const EXP_SUB ExpType = 1 15 | const EXP_ADD ExpType = 1 16 | const EXP_ADD ExpType = 1 17 | 18 | type OprType byte 19 | 20 | const OPRTP_IMM OprType = 1 21 | const OPRTP_REG OprType = 2 22 | const OPRTP_MEM OprType = 3 // 地址类型,需要寻址 23 | const OPRTP_REL OprType = 4 // 符号类型,需要重定位 24 | 25 | type Operand interface { 26 | operand() 27 | } 28 | 29 | type Express interface { 30 | } 31 | 32 | // operand 表示一个操作数 33 | type operand struct { 34 | Type int // 操作数类型 35 | Value string // 操作数值 36 | Base string // 基址寄存器(用于内存引用) 37 | Index string // 变址寄存器(用于内存引用) 38 | Scale int // 比例因子(用于内存引用) 39 | } 40 | 41 | type ExpOpr struct { 42 | ExpList []Express 43 | } 44 | 45 | type GenOpr struct { 46 | Type OprType // 操作数类型(1 立即数, 2寄存器, 4寻址类型(表示会用到 ModRM 字段) ) 47 | Value int64 // 立即数?地址 48 | Length int // 操作数宽度 49 | } 50 | 51 | type RelOpr struct { 52 | Label string // 符号名称 53 | } 54 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Attribution 4.0 International License 2 | 3 | Copyright (c) 2024 Facelang 4 | 5 | This work is licensed under the Creative Commons Attribution 4.0 International License. To view a copy of this license, visit http://creativecommons.org/licenses/by/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. 6 | 7 | You are free to: 8 | - Share — copy and redistribute the material in any medium or format 9 | - Adapt — remix, transform, and build upon the material for any purpose, even commercially 10 | 11 | Under the following terms: 12 | - Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use. 13 | 14 | No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits. 15 | 16 | Notices: 17 | - You do not have to comply with the license for elements of the material in the public domain or where your use is permitted by an applicable exception or limitation. 18 | - No warranties are given. The license may not give you all of the permissions necessary for your intended use. For example, other rights such as publicity, privacy, or moral rights may limit how you use the material. -------------------------------------------------------------------------------- /compiler/assemble/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "github.com/facelang/face/compiler/assemble/internal" 7 | "github.com/facelang/face/compiler/provider/asm" 8 | "github.com/facelang/face/internal/os/elf" 9 | "os" 10 | "path/filepath" 11 | "strings" 12 | ) 13 | 14 | var ( 15 | Debug = flag.Bool("debug", false, "启用调试模式,默认不启用") 16 | OutputFile = flag.String("o", "", "输出文件,默认跟输入文件保持一致") 17 | // todo 可以指定平台信息, 支持跨平台编译 18 | ) 19 | 20 | func Usage() { 21 | fmt.Fprintf(os.Stderr, "usage: asm [options] file.s ...\n") 22 | fmt.Fprintf(os.Stderr, "Flags:\n") 23 | flag.PrintDefaults() 24 | os.Exit(2) 25 | } 26 | 27 | func main() { 28 | if flag.NArg() == 0 { 29 | flag.Usage() 30 | } 31 | 32 | if *OutputFile == "" { 33 | if flag.NArg() != 1 { 34 | flag.Usage() 35 | } 36 | input := filepath.Base(flag.Arg(0)) 37 | input = strings.TrimSuffix(input, ".s") 38 | *OutputFile = fmt.Sprintf("%s.o", input) 39 | } 40 | 41 | for _, f := range flag.Args() { 42 | lexer := internal.NewLexer(f) 43 | parser := internal.NewParser(lexer) 44 | pList := new(obj.Plist) 45 | pList.Firstpc, ok = parser.Parse() // p.firstProg 46 | 47 | obj.Flushplist(ctxt, pList, nil) 48 | } 49 | 50 | buf, err := os.ReadFile("common.t") 51 | if err != nil { 52 | panic(err) 53 | } 54 | for _, b := range buf { 55 | fmt.Printf("%d, ", b) 56 | } 57 | _ = asm.Program("example/hello.s") 58 | println("完成编译!") 59 | file, _ := elf.ReadElf("common.o") 60 | file.Objdump() 61 | 62 | } 63 | -------------------------------------------------------------------------------- /compiler/compile/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "github.com/facelang/face/compiler/compile/internal" 7 | "github.com/facelang/face/compiler/compile/parser" 8 | "github.com/facelang/face/compiler/provider/asm" 9 | "github.com/facelang/face/internal/os/elf" 10 | "os" 11 | "path/filepath" 12 | "strings" 13 | ) 14 | 15 | var ( 16 | Debug = flag.Bool("debug", false, "启用调试模式,默认不启用") 17 | OutputFile = flag.String("o", "", "输出文件,默认跟输入文件保持一致") 18 | // todo 可以指定平台信息, 支持跨平台编译 19 | ) 20 | 21 | func Usage() { 22 | fmt.Fprintf(os.Stderr, "usage: asm [options] file.s ...\n") 23 | fmt.Fprintf(os.Stderr, "Flags:\n") 24 | flag.PrintDefaults() 25 | os.Exit(2) 26 | } 27 | 28 | func main() { 29 | if flag.NArg() == 0 { 30 | flag.Usage() 31 | } 32 | 33 | if *OutputFile == "" { 34 | if flag.NArg() != 1 { 35 | flag.Usage() 36 | } 37 | input := filepath.Base(flag.Arg(0)) 38 | input = strings.TrimSuffix(input, ".s") 39 | *OutputFile = fmt.Sprintf("%s.o", input) 40 | } 41 | 42 | for _, f := range flag.Args() { 43 | lexer := parser.NewLexer(f) 44 | parser := internal.NewParser(lexer) 45 | pList := new(obj.Plist) 46 | pList.Firstpc, ok = parser.Parse() // p.firstProg 47 | 48 | obj.Flushplist(ctxt, pList, nil) 49 | } 50 | 51 | buf, err := os.ReadFile("common.t") 52 | if err != nil { 53 | panic(err) 54 | } 55 | for _, b := range buf { 56 | fmt.Printf("%d, ", b) 57 | } 58 | _ = asm.Program("example/hello.s") 59 | println("完成编译!") 60 | file, _ := elf.ReadElf("common.o") 61 | file.Objdump() 62 | 63 | } 64 | -------------------------------------------------------------------------------- /internal/utils/convert.go: -------------------------------------------------------------------------------- 1 | package utils 2 | 3 | import ( 4 | "encoding/binary" 5 | "math" 6 | "strconv" 7 | ) 8 | 9 | func Float2Bytes(val float64) []byte { 10 | buf := make([]byte, 8) 11 | binary.LittleEndian.PutUint64(buf, math.Float64bits(val)) 12 | return buf 13 | } 14 | 15 | func Float(lit string) float64 { 16 | val, err := strconv.ParseFloat(lit, 64) 17 | if err != nil { 18 | panic("无效的浮点数: " + lit) 19 | } 20 | return val 21 | } 22 | 23 | func FloatBytes(lit string) []byte { 24 | return Float2Bytes(Float(lit)) 25 | } 26 | 27 | func Int2Bytes(val int64) []byte { 28 | buf := make([]byte, 8) 29 | binary.LittleEndian.PutUint64(buf, uint64(val)) 30 | return buf 31 | } 32 | 33 | func Int(lit string) []byte { 34 | if lit == "" { 35 | return make([]byte, 8) // 返回8字节的0 36 | } 37 | var val int64 38 | if lit[0] == '0' { 39 | if len(lit) == 1 { 40 | return make([]byte, 8) 41 | } 42 | switch lit[1] { 43 | case 'b', 'B': // 二进制 44 | v, err := strconv.ParseInt(lit[2:], 2, 64) 45 | if err != nil { 46 | panic("无效的二进制数字: " + lit) 47 | } 48 | val = v 49 | case 'x', 'X': // 十六进制 50 | v, err := strconv.ParseInt(lit[2:], 16, 64) 51 | if err != nil { 52 | panic("无效的十六进制数字: " + lit) 53 | } 54 | val = v 55 | case 'o', 'O': // 八进制 56 | v, err := strconv.ParseInt(lit[2:], 8, 64) 57 | if err != nil { 58 | panic("无效的八进制数字: " + lit) 59 | } 60 | val = v 61 | default: // 八进制(以0开头) 62 | v, err := strconv.ParseInt(lit, 8, 64) 63 | if err != nil { 64 | panic("无效的八进制数字: " + lit) 65 | } 66 | val = v 67 | } 68 | } else { 69 | // 十进制 70 | v, err := strconv.ParseInt(lit, 10, 64) 71 | if err != nil { 72 | panic("无效的十进制数字: " + lit) 73 | } 74 | val = v 75 | } 76 | return val 77 | } 78 | 79 | func IntBytes(lit string) []byte { 80 | return Int2Bytes(Int(lit)) 81 | } 82 | -------------------------------------------------------------------------------- /compiler/assemble/internal/label.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import "fmt" 4 | 5 | type LabelType uint8 6 | 7 | const UNDEFINED_LABEL LabelType = 0 // 未定义 8 | const TEXT_LABEL LabelType = 1 // 代码段符号 9 | const EQU_LABEL LabelType = 2 // 常量 10 | const LOCAL_LABEL LabelType = 3 // 局部变量 11 | const EXTERNAL_LABEL LabelType = 4 // 外部变量, 提前申明的 12 | 13 | type label struct { 14 | Name string // 标签名 15 | Type LabelType // 标签类型 16 | Addr int // 地址 17 | Index int // 添加顺序, 从1开始 18 | Section string // 段名 19 | Times int // 重复次数 20 | Size int // 字节长度 21 | Cont []int // 内容 22 | ContLen int // 内容长度 23 | RelInfo bool // 记录重定位信息 24 | } 25 | 26 | // AddLabel 添加符号到符号表; 一共三处,equ 常量 仅数字 NewRecWithEqu, 变量 NewRecWithData, 代码段 TextLabel 27 | func (p *parser) AddLabel(name string, rec *label) { 28 | rec.Name = name // 缓存一次,减少后续查找名字 29 | if rec.Type == TEXT_LABEL || rec.Type == LOCAL_LABEL { 30 | rec.Addr = p.seg.Offset 31 | rec.Section = p.seg.Name 32 | } 33 | 34 | // 更新地址, 除了具体的变量定义,这里都是 0, 没有变化 35 | p.seg.Offset += rec.Times * rec.Size * rec.ContLen 36 | 37 | if i, ok := p.labelNames[name]; ok { 38 | labelRec := p.labelList[i] 39 | if labelRec.Type == UNDEFINED_LABEL { 40 | p.labelList[i] = rec // 直接替换 41 | } else { 42 | _ = fmt.Errorf("符号: %s 重复定义!", name) 43 | } 44 | } else { 45 | p.labelList = append(p.labelList, rec) 46 | p.labelNames[name] = len(p.labelList) - 1 47 | } 48 | } 49 | 50 | // GetLabel 获取符号 51 | func (p *parser) GetLabel(name string) *label { 52 | if i, ok := p.labelNames[name]; ok { 53 | return p.labelList[i] 54 | } 55 | 56 | // 只有符号引用符号时, 才会被创建 57 | // 未知符号,添加为外部符号(待重定位) 58 | rec := NewLabel(UNDEFINED_LABEL) 59 | rec.Name = name 60 | p.labelList = append(p.labelList, rec) 61 | p.labelNames[name] = len(p.labelList) - 1 62 | return rec 63 | } 64 | 65 | func NewLabel(lType LabelType) *label { 66 | return &label{Type: lType} 67 | } 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Facelang 2 | 3 | Facelang 是一个全新编程语言项目。该项目期望打造一门专为全栈工程师量身定制的跨端编译语言,目前主要基于 Go 实现。 4 | 5 | 下一阶段,会优先使用 LLVM 完成语言的整体功能,确保语言基本可用。 6 | 7 | 后期仍然计划参考 Go 实现更独立,更完善的汇编器和链接器,实现轻量化的编译。 8 | 9 | 该项目目前属于个人维护,大部分功能实现完成度不高,开源的目的是希望对此项目感兴趣的朋友可以一起加入学习、探讨。 10 | 11 | 项目中可能会出现诸多问题,还望大家多多包涵。 12 | 13 | 🤘 🤘 🤘 自嗨 ing ... 14 | 15 | 致歉。 16 | 17 | ## 🚪 快速上手 18 | 19 | - COME SOON ... 20 | 21 | ## 🚀 项目特性 22 | 23 | - 完整的编译器实现 24 | - 汇编器支持 25 | - 链接器功能 26 | - 跨平台支持 27 | - 跨端GUI支持 28 | - 底层原理学习 29 | 30 | ## 🛠️ 技术栈 31 | 32 | - Go 语言 33 | - x86 汇编 34 | - Arm 汇编 35 | - llvm 36 | - C/C++ 37 | 38 | ## 📚 项目结构 39 | 40 | ``` 41 | face-lang/ 42 | ├── compiler/ # 编译器实现 43 | ├── docs/ # 文档 44 | ├── example/ # 示例代码 45 | ├── internal/ # 核心代码 46 | ├── library/ # 标准库 47 | └── tools/ # 反汇编工具 48 | ``` 49 | 50 | ## 🎯 项目目标 51 | 52 | 1. 实现一门完整的编程语言 53 | 2. 提供跨平台编译支持 54 | 3. 为全栈工程师提供高效的开发体验 55 | 4. 探索和学习程序运行的底层原理 56 | 57 | ## 🏃‍♂‍➡ 开发进度 58 | 59 | - [x] Linux 平台支持 60 | - [ ] Osx 平台支持 61 | - [ ] Windows 平台支持 62 | - [x] 基础汇编指令支持 `mov`、`cmp`、`sub`、`add`、`lea`、`call`、`int`、`imul`、`idiv`、`neg`、`inc`、`dec`、`jmp`、`je`、`jg`、`jl`、`jle`、`jne`、`jna`、`push`、`pop` 63 | - [ ] 基于 LLVM 实现 64 | - [ ] 文档完善 65 | - [ ] 其它汇编指令支持 66 | - [ ] 标准库完善 67 | - [ ] 性能优化 68 | 69 | ## 🤝 参与贡献 70 | 71 | 欢迎所有对编程语言实现感兴趣的朋友参与项目开发!您可以通过以下方式参与: 72 | 73 | 1. 提交 Issue 报告问题或建议 74 | 2. 提交 Pull Request 贡献代码 75 | 3. 完善项目文档 76 | 4. 分享使用经验 77 | 78 | ## 📝 项目笔记 79 | 80 | TODO 81 | 82 | ## 🔮 未来规划 83 | 84 | - [ ] 实现更多语言特性 85 | - [ ] 优化编译性能 86 | - [ ] 提供更多平台支持 87 | - [ ] 完善开发工具链 88 | - [ ] 建立活跃的社区 89 | 90 | ## 📄 开源协议 91 | 92 | 本项目采用 [Creative Commons Attribution 4.0 International License](LICENSE) 协议开源。 93 | 94 | 根据该协议,您可以: 95 | - 自由分享和分发本项目 96 | - 自由修改和构建本项目 97 | - 用于任何目的,包括商业用途 98 | 99 | 主要要求: 100 | - 必须注明原作者 101 | - 必须提供许可证链接 102 | - 必须说明是否做了修改 103 | 104 | ## 🌟 致谢 105 | 106 | 感谢所有为项目做出贡献的开发者! 107 | 108 | --- 109 | 110 | 欢迎关注项目,一起探索编程语言的奥秘! -------------------------------------------------------------------------------- /internal/reader/string.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import "fmt" 4 | 5 | func Char(r *Reader) string { 6 | ident, l := String(r, '\'') 7 | if l != 1 { 8 | panic(fmt.Errorf("invalid char literal")) 9 | } 10 | return ident 11 | } 12 | 13 | func String(r *Reader, quote byte) (string, int) { 14 | length := 0 15 | ch, ok := r.ReadByte() // read character after quote 16 | for ch != quote { 17 | if ch == '\n' || !ok { 18 | panic(fmt.Errorf("literal not terminated")) 19 | } 20 | if ch == '\\' { 21 | ch = escape(r, quote) 22 | } else { 23 | ch, ok = r.ReadByte() 24 | } 25 | length++ 26 | } 27 | return r.ReadText(), length 28 | } 29 | 30 | func RawString(r *Reader) string { 31 | ch, ok := r.ReadByte() // read character after '`' 32 | for ch != '`' { 33 | if !ok { 34 | panic(fmt.Errorf("literal not terminated")) 35 | } 36 | ch, ok = r.ReadByte() 37 | } 38 | return r.ReadText() 39 | } 40 | 41 | // Comment 单行注释 42 | func Comment(r *Reader) string { 43 | ch, ok := r.ReadByte() // read character after "//" 44 | for ok && ch != '\n' { 45 | ch, ok = r.ReadByte() 46 | } 47 | r.GoBack() 48 | return r.ReadText() 49 | } 50 | 51 | // 处理转义字符 52 | func escape(r *Reader, quote byte) byte { 53 | ch, _ := r.ReadByte() // read character after '/' 54 | switch ch { 55 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: 56 | // 常见的转义字符, 只需要读一个字符即可 57 | ch, _ = r.ReadByte() 58 | case '0', '1', '2', '3', '4', '5', '6', '7': 59 | // 处理形如 \123 的八进制转义序列 60 | // 最多读取 3 位八进制数字 61 | ch = number(r, ch, 8, 3) 62 | case 'x': 63 | ch, _ = r.ReadByte() 64 | ch = number(r, ch, 16, 2) 65 | case 'u': 66 | ch, _ = r.ReadByte() 67 | ch = number(r, ch, 16, 4) 68 | case 'U': 69 | ch, _ = r.ReadByte() 70 | ch = number(r, ch, 16, 8) 71 | default: 72 | panic(fmt.Errorf("invalid char escape")) 73 | } 74 | return ch 75 | } 76 | 77 | // 处理数字部分 78 | func number(r *Reader, ch byte, base, n int) byte { 79 | for n > 0 && digitVal(ch) < base { 80 | ch, _ = r.ReadByte() 81 | n-- 82 | } 83 | if n > 0 { 84 | panic(fmt.Errorf("invalid char escape")) 85 | } 86 | return ch 87 | } 88 | -------------------------------------------------------------------------------- /internal/os/elf/debug.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | import ( 4 | "strings" 5 | ) 6 | 7 | //type intName struct { 8 | // i uint64 9 | // s string 10 | //} 11 | // 12 | //func stringName(i uint64, names []intName, goSyntax bool) string { 13 | // for _, n := range names { 14 | // if n.i == i { 15 | // if goSyntax { 16 | // return "elf." + n.s 17 | // } 18 | // return n.s 19 | // } 20 | // } 21 | // 22 | // // second pass - look for smaller to add with. 23 | // // assume sorted already 24 | // for j := len(names) - 1; j >= 0; j-- { 25 | // n := names[j] 26 | // if n.i < i { 27 | // s := n.s 28 | // if goSyntax { 29 | // s = "elf." + s 30 | // } 31 | // return s + "+" + strconv.FormatUint(uint64(i-n.i), 10) 32 | // } 33 | // } 34 | // 35 | // return strconv.FormatUint(uint64(i), 10) 36 | //} 37 | // 38 | //var shtStrings = []intName{ 39 | // {0, "SHT_NULL"}, 40 | // {1, "SHT_PROGBITS"}, 41 | // {2, "SHT_SYMTAB"}, 42 | // {3, "SHT_STRTAB"}, 43 | // {4, "SHT_RELA"}, 44 | // {5, "SHT_HASH"}, 45 | // {6, "SHT_DYNAMIC"}, 46 | // {7, "SHT_NOTE"}, 47 | // {8, "SHT_NOBITS"}, 48 | // {9, "SHT_REL"}, 49 | // {10, "SHT_SHLIB"}, 50 | // {11, "SHT_DYNSYM"}, 51 | // {14, "SHT_INIT_ARRAY"}, 52 | // {15, "SHT_FINI_ARRAY"}, 53 | // {16, "SHT_PREINIT_ARRAY"}, 54 | // {17, "SHT_GROUP"}, 55 | // {18, "SHT_SYMTAB_SHNDX"}, 56 | // {0x60000000, "SHT_LOOS"}, 57 | // {0x6ffffff5, "SHT_GNU_ATTRIBUTES"}, 58 | // {0x6ffffff6, "SHT_GNU_HASH"}, 59 | // {0x6ffffff7, "SHT_GNU_LIBLIST"}, 60 | // {0x6ffffffd, "SHT_GNU_VERDEF"}, 61 | // {0x6ffffffe, "SHT_GNU_VERNEED"}, 62 | // {0x6fffffff, "SHT_GNU_VERSYM"}, 63 | // {0x70000000, "SHT_LOPROC"}, 64 | // {0x7000002a, "SHT_MIPS_ABIFLAGS"}, 65 | // {0x7fffffff, "SHT_HIPROC"}, 66 | // {0x80000000, "SHT_LOUSER"}, 67 | // {0xffffffff, "SHT_HIUSER"}, 68 | //} 69 | 70 | func SectionTypeName(i uint32) string { 71 | return stringName(i, shtStrings, false) 72 | } 73 | 74 | //var shfStrings = []intName{ 75 | // {0x1, "SHF_WRITE"}, 76 | // {0x2, "SHF_ALLOC"}, 77 | // {0x4, "SHF_EXECINSTR"}, 78 | // {0x10, "SHF_MERGE"}, 79 | // {0x20, "SHF_STRINGS"}, 80 | // {0x40, "SHF_INFO_LINK"}, 81 | // {0x80, "SHF_LINK_ORDER"}, 82 | // {0x100, "SHF_OS_NONCONFORMING"}, 83 | // {0x200, "SHF_GROUP"}, 84 | // {0x400, "SHF_TLS"}, 85 | // {0x800, "SHF_COMPRESSED"}, 86 | //} 87 | 88 | func SectionFlagName(i uint64) string { 89 | return stringName(uint32(i), shfStrings, false) 90 | } 91 | 92 | func StringTableName(bytes []byte, start uint32) string { 93 | builder := strings.Builder{} 94 | ch := bytes[start] 95 | offset := start 96 | for ch != 0 { 97 | builder.WriteByte(ch) 98 | offset += 1 99 | ch = bytes[offset] 100 | } 101 | return builder.String() 102 | } 103 | -------------------------------------------------------------------------------- /internal/reader/reader.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "unicode/utf8" 7 | ) 8 | 9 | type Reader struct { 10 | filename string // 文件名称 11 | buff []byte // 缓存池 12 | ch byte // 主要记录换行符更新 13 | chw int // 缓存字符宽度, 下一次读更新上一个字符宽度 14 | b, r, e int // 读取器游标 15 | line, col, off int // 文件指针 16 | } 17 | 18 | func (r *Reader) errorf(format string, args ...any) { 19 | panic(fmt.Errorf("Reader Error: %s\n\t->[%d, %d] %s", 20 | fmt.Sprintf(format, args...), r.line+1, r.col+1, r.filename)) 21 | } 22 | 23 | func (r *Reader) Pos() int { 24 | return r.off 25 | } 26 | 27 | // GoBack 回退一个字符 28 | func (r *Reader) GoBack() { 29 | r.ch = 0 30 | r.chw = 0 31 | } 32 | 33 | // ReadByte 返回值是否为 eof 34 | func (r *Reader) ReadByte() (byte, bool) { 35 | if r.chw > 0 { // 文件位置信息记录更新, 下一个字符开始 = 上一个字符结束 + 上一个字符宽度 36 | r.r += r.chw 37 | r.off += r.chw 38 | 39 | if r.ch == '\n' { 40 | r.col = 0 41 | r.line += 1 42 | } else { 43 | r.col += 1 // utf8 字符占一列 44 | } 45 | 46 | r.chw = 0 47 | } 48 | 49 | // eof 50 | if r.r == r.e { 51 | r.ch = 0 52 | r.chw = 0 53 | return 0, true 54 | } 55 | 56 | r.ch = r.buff[r.r] 57 | r.chw = 1 58 | return r.ch, false 59 | } 60 | 61 | func (r *Reader) ReadRune() (rune, int) { 62 | redo: 63 | c, eof := r.ReadByte() 64 | if eof { 65 | return 0, 0 66 | } 67 | 68 | if c < utf8.RuneSelf { 69 | return rune(c), 1 70 | } 71 | 72 | // 解码 UTF-8 字符 73 | ch, chw := utf8.DecodeRune(r.buff[r.r:r.e]) 74 | 75 | r.chw = chw 76 | 77 | // 检查解码错误 78 | if ch == utf8.RuneError && chw == 1 { // 无效的 UTF-8 编码 79 | r.errorf("invalid UTF-8 encoding at position %d", r.off-1) 80 | } 81 | 82 | const BOM = 0xfeff 83 | if ch == BOM { 84 | if r.off > 0 { 85 | r.errorf("invalid BOM in the middle of the file") 86 | } 87 | goto redo // 忽略 BOM 字符 88 | } 89 | 90 | return ch, chw 91 | } 92 | 93 | // TextReady 文本读取器准备就绪 94 | func (r *Reader) TextReady() { 95 | r.b = r.r 96 | } 97 | 98 | // ReadText 读取一段本文 99 | func (r *Reader) ReadText() string { 100 | defer func() { 101 | r.b = -1 // 重置游标 102 | }() 103 | 104 | return string(r.buff[r.b : r.r+r.chw]) 105 | } 106 | 107 | // FileReader todo 蔚来可能扩展支持 多种数据源读取模式,比如数据流 108 | func FileReader(file string) *Reader { 109 | r := &Reader{filename: file} 110 | 111 | buff, err := os.ReadFile(file) 112 | if err != nil { 113 | r.errorf("failed to read file: %s", err) 114 | } 115 | 116 | r.buff = buff 117 | r.e = len(r.buff) 118 | return r 119 | } 120 | 121 | func BytesReader(input []byte) *Reader { 122 | r := &Reader{filename: "#Bytes"} 123 | r.buff = input 124 | r.e = len(input) 125 | return r 126 | } 127 | -------------------------------------------------------------------------------- /internal/reader/decimal_test.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "github.com/facelang/face/compiler/compile/internal/tokens" 5 | "testing" 6 | ) 7 | 8 | func TestDecimal(t *testing.T) { 9 | tests := []struct { 10 | name string 11 | input string 12 | wantTok tokens.Token 13 | wantText string 14 | wantErr bool 15 | }{ 16 | // 十进制整数测试 17 | {"decimal integer", "123", tokens.INT, "123", false}, 18 | {"decimal integer with underscore", "1_2_3", tokens.INT, "123", false}, 19 | {"decimal zero", "0", tokens.INT, "0", false}, 20 | 21 | // 八进制测试 22 | {"octal with prefix 0", "0123", tokens.INT, "0123", false}, 23 | {"octal with prefix o", "0o123", tokens.INT, "0o123", false}, 24 | {"invalid octal float", "0o1.2", tokens.INT, "0o1", true}, 25 | 26 | // 十六进制测试 27 | {"hex with prefix x", "0x1A", tokens.INT, "0x1A", false}, 28 | {"hex with prefix X", "0X1a", tokens.INT, "0X1a", false}, 29 | {"hex float", "0x1.2p3", tokens.FLOAT, "0x1.2p3", false}, 30 | {"hex float with capital P", "0x1.2P3", tokens.FLOAT, "0x1.2P3", false}, 31 | {"hex float with negative exponent", "0x1.2p-3", tokens.FLOAT, "0x1.2p-3", false}, 32 | 33 | // 二进制测试 34 | {"binary with prefix b", "0b1010", tokens.INT, "0b1010", false}, 35 | {"binary with prefix B", "0B1010", tokens.INT, "0B1010", false}, 36 | {"invalid binary float", "0b1.01", tokens.INT, "0b1", true}, 37 | 38 | // 十进制浮点数测试 39 | {"decimal float", "123.456", tokens.FLOAT, "123.456", false}, 40 | {"decimal float with exponent", "123.456e10", tokens.FLOAT, "123.456e10", false}, 41 | {"decimal float with capital E", "123.456E10", tokens.FLOAT, "123.456E10", false}, 42 | {"decimal float with negative exponent", "123.456e-10", tokens.FLOAT, "123.456e-10", false}, 43 | {"decimal float with positive exponent", "123.456e+10", tokens.FLOAT, "123.456e+10", false}, 44 | 45 | // 错误情况测试 46 | {"invalid hex exponent", "0x1.2e3", tokens.FLOAT, "0x1", true}, 47 | {"invalid decimal exponent", "0o1.2e3", tokens.FLOAT, "0o1", true}, 48 | {"hex float without exponent", "0x1.2", tokens.FLOAT, "0x1", true}, 49 | {"no digits", "0x", tokens.INT, "0x", true}, 50 | } 51 | 52 | for _, tt := range tests { 53 | t.Run(tt.name, func(t *testing.T) { 54 | // 创建 reader 55 | r := BytesReader([]byte(tt.input)) 56 | first, _ := r.ReadByte() 57 | 58 | defer func() { 59 | if r := recover(); r != nil { 60 | if !tt.wantErr { 61 | t.Errorf("Decimal() unexpected panic: %v", r) 62 | } 63 | } 64 | }() 65 | 66 | gotTok, gotText := Decimal(r, first) 67 | 68 | if !tt.wantErr { 69 | if gotTok != tt.wantTok { 70 | t.Errorf("Decimal() got token = %v, want %v", gotTok, tt.wantTok) 71 | } 72 | if gotText != tt.wantText { 73 | t.Errorf("Decimal() got text = %v, want %v", gotText, tt.wantText) 74 | } 75 | } 76 | }) 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /internal/os/elf/prog.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | import "encoding/binary" 4 | 5 | // Block 表示一个数据块 6 | type Block struct { 7 | Data []byte 8 | Offset uint32 9 | Size uint32 10 | } 11 | 12 | // ProgSeg 表示段的列表, 还有两个方法: allocAddr, relocAddr 13 | type ProgSeg struct { 14 | Name string // 段名称 15 | BaseAddr uint32 // 分配基地址 16 | Offset uint32 // 合并后的文件偏移 17 | Size uint32 // 合并后大小 18 | Begin uint32 // 对齐前开始位置偏移 19 | OwnerList []*File // 拥有该段的文件序列 20 | Blocks []*Block // 记录合并后的数据块序列 21 | } 22 | 23 | // AllocAddr 分配地址空间 base 是基址, off 是偏移 24 | func (s *ProgSeg) AllocAddr(name string, base *uint32, off *uint32) { 25 | s.Begin = *off //记录对齐前偏移 26 | 27 | // 虚拟地址对齐,让所有的段按照4KB字节对齐 28 | if name != ".bss" { 29 | *base += (MemAlign - *base%MemAlign) % MemAlign 30 | } 31 | 32 | // 偏移地址对齐,让一般段按照4字节对齐,文本段按照16字节对齐 33 | align := uint32(DiscAlign) 34 | if name == ".text" { 35 | align = 16 36 | } 37 | *off += (align - *off%align) % align 38 | // 这里 off 的偏移和 base 的偏移不同 39 | 40 | // 使虚址和偏移按照4KB模同余 41 | *base = *base - *base%MemAlign + *off%MemAlign // todo 有些看不懂了 42 | 43 | // 累加地址和偏移 44 | s.BaseAddr = *base 45 | s.Offset = *off 46 | s.Size = 0 47 | for _, file := range s.OwnerList { // 拥有该段的所有文件,合并数据 48 | s.Size += (DiscAlign - s.Size%DiscAlign) % DiscAlign // 对齐每个小段,按照4字节,数据靠后 49 | seg := file.ShdrTab[name] 50 | //读取需要合并段的数据 51 | if name != ".bss" { 52 | buf := file.ReadData(seg.Offset, seg.Size) 53 | block := &Block{ 54 | Data: buf, 55 | Offset: s.Size, // 数据靠前靠后是否有区别? 56 | Size: seg.Size, 57 | } 58 | s.Blocks = append(s.Blocks, block) // 添加到数据块 59 | } 60 | //修改每个文件中对应段的addr(seg 记录虚拟地址, 代表每一段数据在程序运行时加载到不同的地址段) 61 | seg.Addr = *base + s.Size //修改每个文件的段虚拟,为了方便计算符号或者重定位的虚址,不需要保存合并后文件偏移 62 | s.Size += seg.Size //累加段大小 63 | } 64 | *base += s.Size // 基址也需要更新 65 | if name != ".bss" { 66 | *off += s.Size 67 | } 68 | } 69 | 70 | // RelocAddr 根据提供的重定位信息重定位地址 71 | func (s *ProgSeg) RelocAddr(relAddr uint32, relocType uint8, symAddr uint32) { 72 | relOffset := relAddr - s.BaseAddr //同类合并段的数据偏移 73 | 74 | // 查找修正地址所在位置的数据块 75 | var targetBlock *Block 76 | for _, block := range s.Blocks { 77 | if block.Offset <= relOffset && block.Offset+block.Size > relOffset { 78 | targetBlock = block 79 | break 80 | } 81 | } 82 | if targetBlock == nil { 83 | return 84 | } 85 | 86 | //处理字节为b->data[relOffset-b->offset] 87 | // 获取需要修改的地址位置 88 | offset := relOffset - targetBlock.Offset 89 | if offset+4 > uint32(len(targetBlock.Data)) { 90 | return 91 | } 92 | 93 | // 获取当前地址值 94 | currentAddr := binary.LittleEndian.Uint32(targetBlock.Data[offset:]) 95 | 96 | // 根据重定位类型进行修正 97 | switch relocType { 98 | case uint8(R_386_32): // 绝对地址修正 99 | binary.LittleEndian.PutUint32(targetBlock.Data[offset:], symAddr) 100 | case uint8(R_386_PC32): // 相对地址修正 101 | newAddr := symAddr - relAddr + currentAddr 102 | binary.LittleEndian.PutUint32(targetBlock.Data[offset:], newAddr) 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /internal/os/elf/writer.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "os" 7 | ) 8 | 9 | type fileWriter struct { 10 | name string // 文件名 11 | w *bytes.Buffer // 文件输出 12 | err error // 错误记录 13 | order binary.ByteOrder // 读取器 14 | } 15 | 16 | type FileWriter = *fileWriter 17 | 18 | func (f *fileWriter) Write(data any) error { 19 | defer func() { 20 | f.err = nil 21 | }() 22 | if f.err != nil { 23 | return f.err 24 | } 25 | return binary.Write(f.w, f.order, data) 26 | } 27 | 28 | func (f *fileWriter) Flush() error { 29 | if f.err != nil { 30 | return f.err 31 | } 32 | w, err := os.Create(f.name) // 可以覆盖 33 | if err != nil { 34 | return err 35 | } 36 | _, err = w.Write(f.w.Bytes()) 37 | return err 38 | } 39 | 40 | func NewWriter(file string, order binary.ByteOrder) FileWriter { 41 | //w, err := os.Create(file) // 可以覆盖 42 | w, err := bytes.NewBuffer(nil), error(nil) 43 | return &fileWriter{name: file, w: w, err: err, order: order} 44 | } 45 | 46 | // FileWrite 输出elf 文件 47 | func FileWrite(file *File, target string) error { 48 | w := NewWriter(target, file.Endian()) 49 | _ = w.Write(file.Ehdr) //elf文件头 50 | 51 | // 可执行文件 52 | if file.Ehdr.Type == Elf32_Half(ET_EXEC) { 53 | //程序头表 54 | for _, phdr := range file.PhdrTab { 55 | _ = w.Write(phdr) 56 | } 57 | // 【数据段】最重要的部分 58 | pad := [1]byte{0} 59 | for _, seg := range file.ProgSegList { 60 | padnum := seg.Offset - seg.Begin 61 | for ; padnum != 0; padnum-- { //填充 62 | _ = w.Write(pad) 63 | } 64 | if seg.Name == ".bss" { 65 | continue 66 | } 67 | var oldBlock *Block = nil 68 | instPad := [1]byte{0x90} 69 | for i := 0; i < len(seg.Blocks); i++ { 70 | b := seg.Blocks[i] 71 | if oldBlock != nil { 72 | padnum = b.Offset - (oldBlock.Offset + oldBlock.Size) 73 | for ; padnum != 0; padnum-- { //填充 74 | _ = w.Write(instPad) 75 | } 76 | } 77 | oldBlock = b 78 | _ = w.Write(b.Data) 79 | } 80 | } 81 | } else { 82 | // 【数据段】最重要的部分 83 | var prev *ProgSeg = nil 84 | pad := [1]byte{0} 85 | for _, seg := range file.ProgSegList { 86 | if prev != nil { 87 | padnum := seg.Offset - (prev.Offset + prev.Size) 88 | for ; padnum != 0; padnum-- { //填充 89 | _ = w.Write(pad) 90 | } 91 | } 92 | if seg.Name == ".bss" { 93 | continue 94 | } 95 | for i := 0; i < len(seg.Blocks); i++ { 96 | b := seg.Blocks[i] 97 | _ = w.Write(b.Data) 98 | } 99 | prev = seg 100 | } 101 | } 102 | 103 | // 最后写段表字符串 104 | _ = w.Write(file.Shstrtab) 105 | 106 | // 段表 107 | for _, sh := range file.ShdrNames { 108 | _ = w.Write(file.ShdrTab[sh]) 109 | } 110 | 111 | // 符号表 112 | for _, sym := range file.SymNames { 113 | _ = w.Write(file.SymTab[sym]) 114 | } 115 | 116 | // 字符串表 117 | _ = w.Write(file.Strtab) 118 | 119 | // 重定位表 120 | for _, rel := range file.RelTab { 121 | _ = w.Write(rel.Rel) 122 | } 123 | 124 | return w.Flush() // 最后一部再写入文件 125 | } 126 | -------------------------------------------------------------------------------- /internal/prog/error.go: -------------------------------------------------------------------------------- 1 | package prog 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sort" 7 | ) 8 | 9 | type Error struct { 10 | Pos FilePos 11 | Message string 12 | } 13 | 14 | // Error implements the error interface. 15 | func (e Error) Error() string { 16 | return fmt.Sprintf("%s:\n\t_>%s", e.Pos.String(), e.Message) 17 | } 18 | 19 | type ErrorList []*Error 20 | 21 | // Add adds an [Error] with given position and error message to an [ErrorList]. 22 | func (p *ErrorList) Add(pos FilePos, msg string) { 23 | *p = append(*p, &Error{pos, msg}) 24 | } 25 | 26 | // Reset resets an [ErrorList] to no errors. 27 | func (p *ErrorList) Reset() { *p = (*p)[0:0] } 28 | 29 | // [ErrorList] implements the sort Interface. 30 | func (p ErrorList) Len() int { return len(p) } 31 | func (p ErrorList) Swap(i, j int) { p[i], p[j] = p[j], p[i] } 32 | 33 | func (p ErrorList) Less(i, j int) bool { 34 | e := &p[i].Pos 35 | f := &p[j].Pos 36 | // Note that it is not sufficient to simply compare file offsets because 37 | // the offsets do not reflect modified line information (through //line 38 | // comments). 39 | if e.Filename != f.Filename { 40 | return e.Filename < f.Filename 41 | } 42 | if e.Line != f.Line { 43 | return e.Line < f.Line 44 | } 45 | if e.Col != f.Col { 46 | return e.Col < f.Col 47 | } 48 | return p[i].Message < p[j].Message 49 | } 50 | 51 | // Sort sorts an [ErrorList]. *[Error] entries are sorted by position, 52 | // other errors are sorted by error message, and before any *[Error] 53 | // entry. 54 | func (p ErrorList) Sort() { 55 | sort.Sort(p) 56 | } 57 | 58 | // RemoveMultiples sorts an [ErrorList] and removes all but the first error per line. 59 | func (p *ErrorList) RemoveMultiples() { 60 | sort.Sort(p) 61 | var last FilePos // initial last.Line is != any legal error line 62 | i := 0 63 | for _, e := range *p { 64 | if e.Pos.Filename != last.Filename || e.Pos.Line != last.Line { 65 | last = e.Pos 66 | (*p)[i] = e 67 | i++ 68 | } 69 | } 70 | *p = (*p)[0:i] 71 | } 72 | 73 | // An [ErrorList] implements the error interface. 74 | func (p ErrorList) Error() string { 75 | switch len(p) { 76 | case 0: 77 | return "no errors" 78 | case 1: 79 | return p[0].Error() 80 | } 81 | return fmt.Sprintf("%s (and %d more errors)", p[0], len(p)-1) 82 | } 83 | 84 | // Err returns an error equivalent to this error list. 85 | // If the list is empty, Err returns nil. 86 | func (p ErrorList) Err() error { 87 | if len(p) == 0 { 88 | return nil 89 | } 90 | return p 91 | } 92 | 93 | // PrintError is a utility function that prints a list of errors to w, 94 | // one error per line, if the err parameter is an [ErrorList]. Otherwise 95 | // it prints the err string. 96 | func PrintError(w io.Writer, err error) { 97 | if list, ok := err.(ErrorList); ok { 98 | for _, e := range list { 99 | fmt.Fprintf(w, "%s\n", e) 100 | } 101 | } else if err != nil { 102 | fmt.Fprintf(w, "%s\n", err) 103 | } 104 | } 105 | 106 | type ErrorFunc func(info *FilePos, msg string) 107 | -------------------------------------------------------------------------------- /internal/reader/reader_test.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "unicode/utf8" 7 | ) 8 | 9 | func TestReadRune(t *testing.T) { 10 | // 创建测试文件,包含各种 UTF-8 字符 11 | testContent := "Hello 世界\n你好 🌍\nTest 测试" 12 | testFile := "test_utf8.txt" 13 | 14 | // 写入测试文件 15 | err := os.WriteFile(testFile, []byte(testContent), 0644) 16 | if err != nil { 17 | t.Fatalf("Failed to create test file: %v", err) 18 | } 19 | defer os.Remove(testFile) 20 | 21 | // 创建 Reader 22 | reader := FileReader(testFile) 23 | 24 | // 读取所有字符并验证 25 | var result []rune 26 | for { 27 | ch, chw := reader.ReadRune() 28 | if chw == 0 { 29 | break 30 | } 31 | t.Logf("Read rune: %c (%U)", ch, ch) 32 | result = append(result, ch) 33 | } 34 | 35 | // 验证读取的字符是否正确 36 | expected := []rune(testContent) 37 | t.Logf("Got content: %q", result) 38 | t.Logf("Expected length: %d, Got length: %d", len(expected), len(result)) 39 | 40 | if len(result) != len(expected) { 41 | t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected)) 42 | } 43 | 44 | for i, r := range result { 45 | if r != expected[i] { 46 | t.Errorf("Character mismatch at position %d: got %c (%U), expected %c (%U)", 47 | i, r, r, expected[i], expected[i]) 48 | } 49 | } 50 | } 51 | 52 | func TestReadRuneASCII(t *testing.T) { 53 | // 测试纯 ASCII 字符 54 | testContent := "Hello World\nTest 123" 55 | testFile := "test_ascii.txt" 56 | 57 | err := os.WriteFile(testFile, []byte(testContent), 0644) 58 | if err != nil { 59 | t.Fatalf("Failed to create test file: %v", err) 60 | } 61 | defer os.Remove(testFile) 62 | 63 | reader := FileReader(testFile) 64 | 65 | var result []rune 66 | for { 67 | r, chw := reader.ReadRune() 68 | if chw == 0 { 69 | break 70 | } 71 | result = append(result, r) 72 | } 73 | 74 | expected := []rune(testContent) 75 | if len(result) != len(expected) { 76 | t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected)) 77 | } 78 | 79 | for i, r := range result { 80 | if r != expected[i] { 81 | t.Errorf("Character mismatch at position %d: got %c, expected %c", i, r, expected[i]) 82 | } 83 | } 84 | } 85 | 86 | func TestReadRuneEmoji(t *testing.T) { 87 | // 测试包含 emoji 的文本 88 | testContent := "Hello 🌍 World 🚀" 89 | testFile := "test_emoji.txt" 90 | 91 | err := os.WriteFile(testFile, []byte(testContent), 0644) 92 | if err != nil { 93 | t.Fatalf("Failed to create test file: %v", err) 94 | } 95 | defer os.Remove(testFile) 96 | 97 | reader := FileReader(testFile) 98 | 99 | var result []rune 100 | for { 101 | r, chw := reader.ReadRune() 102 | if chw == 0 { 103 | break 104 | } 105 | result = append(result, r) 106 | } 107 | 108 | expected := []rune(testContent) 109 | if len(result) != len(expected) { 110 | t.Fatalf("Length mismatch: got %d, expected %d", len(result), len(expected)) 111 | } 112 | 113 | for i, r := range result { 114 | if r != expected[i] { 115 | t.Errorf("Character mismatch at position %d: got %c (%U), expected %c (%U)", 116 | i, r, r, expected[i], expected[i]) 117 | } 118 | } 119 | } 120 | 121 | func TestReadRuneInvalidUTF8(t *testing.T) { 122 | // 测试无效的 UTF-8 序列 123 | invalidUTF8 := []byte{0xFF, 0xFE, 0xFD} // 无效的 UTF-8 字节序列 124 | testFile := "test_invalid.txt" 125 | 126 | err := os.WriteFile(testFile, invalidUTF8, 0644) 127 | if err != nil { 128 | t.Fatalf("Failed to create test file: %v", err) 129 | } 130 | defer os.Remove(testFile) 131 | 132 | reader := FileReader(testFile) 133 | 134 | // 读取第一个字符,应该返回 RuneError 135 | r, chw := reader.ReadRune() 136 | if r != utf8.RuneError { 137 | t.Errorf("Expected RuneError for invalid UTF-8, got %c (%U)", r, r) 138 | } 139 | if chw != 1 { 140 | t.Error("Expected width 1 for invalid UTF-8") 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /internal/reader/decimal.go: -------------------------------------------------------------------------------- 1 | package reader 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type NumberType int8 9 | 10 | const INT_TYPE NumberType = 1 11 | const FLOAT_TYPE NumberType = 2 12 | 13 | // Number 这是一个数字的解析器, 读取结束,最后一个字符不是有效数字, 可能是其它符号,所以需要退回最后一个 14 | func Number(r *Reader, first rune) (NumberType, string) { 15 | defer func() { 16 | r.GoBack() // 最后一个符号需要回退 17 | }() 18 | 19 | base := 10 // 数字基数 20 | prefix := byte(0) // 前缀:0(十进制), '0'(八进制), 'x'(十六进制), 'o'(八进制), 'b'(二进制) 21 | flags := byte(0) // 位标志:bit 0: 有数字, bit 1: 有下划线, bit 2 符号异常 22 | 23 | // 整数部分 24 | ds := byte(0) 25 | ch := byte(first) 26 | tok := INT_TYPE 27 | 28 | if first == '0' { 29 | ch, _ = r.ReadByte() 30 | switch ch { 31 | case '.': // 小数 32 | tok = FLOAT_TYPE 33 | case 'x', 'X': 34 | ch, _ = r.ReadByte() 35 | base, prefix = 16, 'x' 36 | case 'o', 'O': 37 | ch, _ = r.ReadByte() 38 | base, prefix = 8, 'o' 39 | case 'b', 'B': 40 | ch, _ = r.ReadByte() 41 | base, prefix = 2, 'b' 42 | default: 43 | base, prefix = 8, '0' 44 | flags = 1 // 前导0, 或者 只为 0 45 | } 46 | } else if first == '.' { 47 | tok = FLOAT_TYPE 48 | } else { 49 | flags = 1 // 前导数 50 | } 51 | 52 | // 整数和16进制支持小数表达, 先读取整数部分 53 | // 123.456 和 0x1.2p3 都是合法的 54 | if tok == INT_TYPE || prefix != 'x' { 55 | ch, ds = digits(r, ch, base) // 解析所有数字和下划线 56 | flags |= ds // ds 的值为 01 表示有数字,10 表示有下划线 57 | if ch == '.' { 58 | if flags&1 == 0 { // 0x. 是非法的 59 | panic(fmt.Errorf("%s has no digits", decimalName(prefix))) 60 | } 61 | tok = FLOAT_TYPE 62 | } 63 | } 64 | 65 | // 非十进制,或者小数 (小数点后的数字或其它进制) 66 | if tok == FLOAT_TYPE || prefix != 0 { 67 | ch, ds = digits(r, ch, base) // 解析所有数字和下划线 68 | flags |= ds // ds 的值为 01 表示有数字,10 表示有下划线 69 | if flags&1 == 0 { // 没有读取到数字 70 | panic(fmt.Errorf("%s has no digits", decimalName(prefix))) 71 | } 72 | } 73 | 74 | // 指数部分(e/E 用于十进制,p/P 用于十六进制) 75 | if e := ch; e == 'e' || e == 'E' || e == 'p' || e == 'P' { 76 | if (e == 'e' || e == 'E') && prefix != 0 { 77 | panic(fmt.Errorf("%q exponent requires decimal mantissa", ch)) 78 | } 79 | if (e == 'p' || e == 'P') && prefix != 'x' { 80 | panic(fmt.Errorf("%q exponent requires hexadecimal mantissa", ch)) 81 | } 82 | 83 | ch, _ = r.ReadByte() 84 | tok = FLOAT_TYPE 85 | if ch == '+' || ch == '-' { 86 | ch, _ = r.ReadByte() 87 | } 88 | 89 | _, ds = digits(r, ch, 10) // 指数后面的值, 只能十进制 90 | flags |= ds 91 | 92 | if ds&1 == 0 { // 指数后面没有数字 93 | panic(fmt.Errorf("exponent has no digits")) 94 | } 95 | } 96 | 97 | if flags&2 == 0 { 98 | return tok, r.ReadText() 99 | } 100 | 101 | // 数字中有 _ 需要踢掉 102 | return tok, strings.ReplaceAll(r.ReadText(), "_", "") 103 | } 104 | 105 | // 辅助函数:解析数字序列 106 | func digits(r *Reader, ch byte, base int) (byte, byte) { 107 | ds := byte(0) // 位标志:bit 0: 有数字, bit 1: 有下划线 bit 3: 异常 108 | for { 109 | if ch == '.' { // 不是小数点,直接跳出循环 110 | break 111 | } 112 | if ch == '_' { 113 | ds |= 2 // 记录下划线 114 | ch, _ = r.ReadByte() 115 | continue 116 | } 117 | d := digitVal(ch) // 获取字符的数值 118 | if d >= base { // 如果数值大于等于基数 119 | ds |= 4 // 记录异常 120 | break // 跳出循环 121 | } 122 | ds |= 1 // 记录数字 123 | ch, _ = r.ReadByte() // 读取下一个字符 124 | } 125 | return ch, ds 126 | } 127 | 128 | // 辅助函数:获取数字值 129 | func digitVal(ch byte) int { 130 | switch { 131 | case '0' <= ch && ch <= '9': 132 | return int(ch - '0') 133 | case 'a' <= ch && ch <= 'z': 134 | return int(ch - 'a' + 10) 135 | case 'A' <= ch && ch <= 'Z': 136 | return int(ch - 'A' + 10) 137 | } 138 | return 36 // 大于任何有效数字 139 | } 140 | 141 | // 辅助函数:获取数字字面量名称 142 | func decimalName(prefix byte) string { 143 | switch prefix { 144 | case 'x': 145 | return "hexadecimal" 146 | case 'o': 147 | return "octal" 148 | case 'b': 149 | return "binary" 150 | case '0': 151 | return "octal" 152 | default: 153 | return "decimal" 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /compiler/assemble/internal/lexer.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "github.com/facelang/face/internal/reader" 5 | "unicode" 6 | "unicode/utf8" 7 | ) 8 | 9 | // Whitespace 对比 map, switch 位掩码 比较效率最高, 忽略 \n 10 | const Whitespace = 1<<'\t' | 1<<'\n' | 1<<'\r' | 1<<' ' 11 | 12 | //type lexer struct { 13 | // reader *reader.Reader 14 | // token tokens2.Token 15 | // ident string 16 | //} 17 | // 18 | //func (lex *lexer) NextToken() tokens2.Token { 19 | // ch, chw := lex.reader.ReadRune() 20 | // if chw == 0 { 21 | // return tokens2.EOF 22 | // } 23 | // 24 | // // skip white space 25 | // for Whitespace&(1< 0 // 第一个字符必须是字母或下划线 73 | //} 74 | // 75 | //func GetDecimal(lex *lexer, ch rune) tokens2.Token { 76 | // token, val := reader.Decimal(lex.reader, ch) 77 | // lex.ident = val 78 | // return token 79 | //} 80 | 81 | type lexer struct { 82 | *reader.Reader // 读取器 83 | id string // 暂存字符 84 | pos int // 文件读取指针行列号 85 | back bool // 回退标识 86 | backToken Token // 回退Token 87 | } 88 | 89 | func (lex *lexer) Back(token Token) { 90 | lex.back = true 91 | lex.backToken = token 92 | } 93 | 94 | func (lex *lexer) NextToken() Token { 95 | defer func() { 96 | lex.back = false 97 | }() 98 | 99 | // 如果有回退,先获取回退 100 | if lex.back { 101 | return lex.backToken 102 | } 103 | 104 | ch, chw := lex.ReadRune() 105 | if chw == 0 { 106 | return EOF 107 | } 108 | 109 | lex.pos = lex.Pos() 110 | 111 | // skip white space 112 | for Whitespace&(1< utf8.RuneSelf || unicode.IsDigit(ch) && i > 0 // 第一个字符必须是字母或下划线 163 | } 164 | 165 | func Number(lex *lexer, ch rune) Token { 166 | typ, val := reader.Number(lex.Reader, ch) 167 | lex.id = val 168 | 169 | if typ == reader.INT_TYPE { 170 | return INT 171 | } 172 | return FLOAT 173 | } 174 | 175 | func NewLexer(file string) *lexer { // 封装后的读取器 176 | return &lexer{Reader: reader.FileReader(file)} 177 | } 178 | -------------------------------------------------------------------------------- /compiler/compile/parser/lexer.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "github.com/facelang/face/compiler/compile/token" 5 | "github.com/facelang/face/internal/reader" 6 | "unicode" 7 | "unicode/utf8" 8 | ) 9 | 10 | // Whitespace 对比 map, switch 位掩码 比较效率最高, 忽略 \n 11 | const Whitespace = 1<<'\t' | 1<<'\r' | 1<<' ' 12 | 13 | type lexer struct { 14 | *reader.Reader // 读取器 15 | pos token.Pos // 位置信息 16 | identifier string // 标识符 17 | } 18 | 19 | //type lexer struct { 20 | // buffer *buffer // 读取器 21 | // content string // 暂存字符 22 | // col, line, offset int // 文件读取指针行列号 23 | // back bool // 回退标识 24 | // backToken Token // 回退Token 25 | //} 26 | 27 | //func (l *lexer) init(file string, errFunc ErrorFunc) error { 28 | // defer func() { next, _ := lex.ReadByte() }() 29 | // return l.buffer.init(file, errFunc) 30 | //} 31 | // 32 | //func (l *lexer) Back(token Token) { 33 | // l.back = true 34 | // l.backToken = token 35 | //} 36 | 37 | // NextToken todo 需要处理分号,和换行符, 还需要处理:分支语句中,必须是分号,其它情况可以是换行符或者分号 38 | func (lex *lexer) NextToken() token.Token { 39 | //defer func() { 40 | // l.back = false 41 | //}() 42 | // 43 | //// 如果有回退,先获取回退 44 | //if l.back { 45 | // return l.backToken 46 | //} 47 | 48 | ch, chw := lex.ReadRune() 49 | if chw == 0 { 50 | return token.EOF 51 | } 52 | 53 | lex.pos = token.Pos(lex.Pos()) 54 | 55 | // skip white space 56 | for Whitespace&(1<': 99 | next, _ := lex.ReadByte() 100 | if next == '=' { 101 | return token.GEQ 102 | } else if next == '>' { 103 | return token.SHR 104 | } else { 105 | lex.GoBack() 106 | return token.GTR 107 | } 108 | case '<': 109 | next, _ := lex.ReadByte() 110 | if next == '=' { 111 | return token.LEQ 112 | } else if next == '>' { 113 | return token.SHL 114 | } else { 115 | lex.GoBack() 116 | return token.LSS 117 | } 118 | case '=': 119 | next, _ := lex.ReadByte() 120 | if next == '=' { 121 | return token.EQL 122 | } 123 | lex.GoBack() 124 | return token.ASSIGN 125 | case '!': 126 | next, _ := lex.ReadByte() 127 | if next == '=' { 128 | return token.NEQ 129 | } 130 | lex.GoBack() 131 | return token.NOT 132 | case ';': 133 | return token.SEMICOLON 134 | case ',': 135 | return token.COMMA 136 | case '"': // 查找字符串,到 " 结束, 最后一个字符是 ", 所以不需要回退 137 | ident, _ := reader.String(lex.Reader, '"') 138 | lex.identifier = ident 139 | return token.STRING 140 | case '\'': // 读一个字符, 字符串读, \' 结尾, 不需要回退 141 | lex.identifier = reader.Char(lex.Reader) 142 | return token.CHAR 143 | case '`': // todo 多行文本,需要进一步处理为一般字符串 144 | lex.identifier = reader.RawString(lex.Reader) 145 | return token.STRING 146 | case '(': 147 | return token.LPAREN 148 | case ')': 149 | return token.RPAREN 150 | case '{': 151 | return token.LBRACE 152 | case '}': 153 | return token.RBRACE 154 | default: 155 | return token.ILLEGAL 156 | } 157 | } 158 | 159 | func CheckIdent(ch rune, i int) bool { 160 | return ch == '.' || ch == '_' || unicode.IsLetter(ch) || 161 | ch > utf8.RuneSelf || unicode.IsDigit(ch) && i > 0 // 第一个字符必须是字母或下划线 162 | } 163 | 164 | func Number(lex *lexer, ch rune) token.Token { 165 | typ, val := reader.Number(lex.Reader, ch) 166 | lex.identifier = val 167 | 168 | if typ == reader.INT_TYPE { 169 | return token.INT 170 | } 171 | return token.FLOAT 172 | } 173 | 174 | func NewLexer(file string) *lexer { // 封装后的读取器 175 | return &lexer{Reader: reader.FileReader(file)} 176 | } 177 | -------------------------------------------------------------------------------- /compiler/compile/ast/scope.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // This file implements scopes and the objects they contain. 6 | 7 | package ast 8 | 9 | import ( 10 | "fmt" 11 | "go/token" 12 | "strings" 13 | ) 14 | 15 | // A Scope maintains the set of named language entities declared 16 | // in the scope and a link to the immediately surrounding (outer) 17 | // scope. 18 | // 19 | // Deprecated: use the type checker [go/types] instead; see [Object]. 20 | type Scope struct { 21 | Outer *Scope 22 | Objects map[string]*Object 23 | } 24 | 25 | // NewScope creates a new scope nested in the outer scope. 26 | func NewScope(outer *Scope) *Scope { 27 | const n = 4 // initial scope capacity 28 | return &Scope{outer, make(map[string]*Object, n)} 29 | } 30 | 31 | // Lookup returns the object with the given name if it is 32 | // found in scope s, otherwise it returns nil. Outer scopes 33 | // are ignored. 34 | func (s *Scope) Lookup(name string) *Object { 35 | return s.Objects[name] 36 | } 37 | 38 | // Insert attempts to insert a named object obj into the scope s. 39 | // If the scope already contains an object alt with the same name, 40 | // Insert leaves the scope unchanged and returns alt. Otherwise 41 | // it inserts obj and returns nil. 42 | func (s *Scope) Insert(obj *Object) (alt *Object) { 43 | if alt = s.Objects[obj.Name]; alt == nil { 44 | s.Objects[obj.Name] = obj 45 | } 46 | return 47 | } 48 | 49 | // Debugging support 50 | func (s *Scope) String() string { 51 | var buf strings.Builder 52 | fmt.Fprintf(&buf, "scope %p {", s) 53 | if s != nil && len(s.Objects) > 0 { 54 | fmt.Fprintln(&buf) 55 | for _, obj := range s.Objects { 56 | fmt.Fprintf(&buf, "\t%s %s\n", obj.Kind, obj.Name) 57 | } 58 | } 59 | fmt.Fprintf(&buf, "}\n") 60 | return buf.String() 61 | } 62 | 63 | // ---------------------------------------------------------------------------- 64 | // Objects 65 | 66 | // An Object describes a named language entity such as a package, 67 | // constant, type, variable, function (incl. methods), or label. 68 | // 69 | // The Data fields contains object-specific data: 70 | // 71 | // Kind Data type Data value 72 | // Pkg *Scope package scope 73 | // Con int iota for the respective declaration 74 | // 75 | // Deprecated: The relationship between Idents and Objects cannot be 76 | // correctly computed without type information. For example, the 77 | // expression T{K: 0} may denote a struct, map, slice, or array 78 | // literal, depending on the type of T. If T is a struct, then K 79 | // refers to a field of T, whereas for the other types it refers to a 80 | // value in the environment. 81 | // 82 | // New programs should set the [parser.SkipObjectResolution] parser 83 | // flag to disable syntactic object resolution (which also saves CPU 84 | // and memory), and instead use the type checker [go/types] if object 85 | // resolution is desired. See the Defs, Uses, and Implicits fields of 86 | // the [types.Info] struct for details. 87 | type Object struct { 88 | Kind ObjKind 89 | Name string // declared name 90 | Decl any // corresponding Field, XxxSpec, FuncDecl, LabeledStmt, AssignStmt, Scope; or nil 91 | Data any // object-specific data; or nil 92 | Type any // placeholder for type information; may be nil 93 | } 94 | 95 | // NewObj creates a new object of a given kind and name. 96 | func NewObj(kind ObjKind, name string) *Object { 97 | return &Object{Kind: kind, Name: name} 98 | } 99 | 100 | // Pos computes the source position of the declaration of an object name. 101 | // The result may be an invalid position if it cannot be computed 102 | // (obj.Decl may be nil or not correct). 103 | func (obj *Object) Ofsset() token.Pos { 104 | name := obj.Name 105 | switch d := obj.Decl.(type) { 106 | case *Field: 107 | for _, n := range d.Names { 108 | if n.Name == name { 109 | return n.Ofsset() 110 | } 111 | } 112 | case *ImportSpec: 113 | if d.Name != nil && d.Name.Name == name { 114 | return d.Name.Ofsset() 115 | } 116 | return d.Path.Ofsset() 117 | case *ValueSpec: 118 | for _, n := range d.Names { 119 | if n.Name == name { 120 | return n.Ofsset() 121 | } 122 | } 123 | case *TypeSpec: 124 | if d.Name.Name == name { 125 | return d.Name.Ofsset() 126 | } 127 | case *FuncDecl: 128 | if d.Name.Name == name { 129 | return d.Name.Ofsset() 130 | } 131 | case *LabeledStmt: 132 | if d.Label.Name == name { 133 | return d.Label.Ofsset() 134 | } 135 | case *AssignStmt: 136 | for _, x := range d.Lhs { 137 | if ident, isIdent := x.(*Ident); isIdent && ident.Name == name { 138 | return ident.Ofsset() 139 | } 140 | } 141 | case *Scope: 142 | // predeclared object - nothing to do for now 143 | } 144 | return token.NoPos 145 | } 146 | 147 | // ObjKind describes what an [Object] represents. 148 | type ObjKind int 149 | 150 | // The list of possible [Object] kinds. 151 | const ( 152 | Bad ObjKind = iota // for error handling 153 | Pkg // package 154 | Con // constant 155 | Typ // type 156 | Var // variable 157 | Fun // function or method 158 | Lbl // label 159 | ) 160 | 161 | var objKindStrings = [...]string{ 162 | Bad: "bad", 163 | Pkg: "package", 164 | Con: "const", 165 | Typ: "type", 166 | Var: "var", 167 | Fun: "func", 168 | Lbl: "label", 169 | } 170 | 171 | func (kind ObjKind) String() string { return objKindStrings[kind] } 172 | -------------------------------------------------------------------------------- /internal/os/elf/reader.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "io" 7 | "os" 8 | "strings" 9 | ) 10 | 11 | type bytesReader struct { 12 | buf []byte // 字节数组 13 | r, e int // 读取器游标 14 | order binary.ByteOrder // 读取器 15 | } 16 | 17 | type BytesReader = *bytesReader 18 | 19 | func (r *bytesReader) Byte() byte { 20 | defer func() { 21 | r.r += 1 22 | }() 23 | return r.buf[r.r] 24 | } 25 | 26 | func (r *bytesReader) Uint16() uint16 { 27 | defer func() { 28 | r.r += 2 29 | }() 30 | return r.order.Uint16(r.buf[r.r : r.r+2]) 31 | } 32 | 33 | func (r *bytesReader) Uint32() uint32 { 34 | defer func() { 35 | r.r += 4 36 | }() 37 | return r.order.Uint32(r.buf[r.r : r.r+4]) 38 | } 39 | 40 | func (r *bytesReader) Uint64() uint64 { 41 | defer func() { 42 | r.r += 8 43 | }() 44 | return r.order.Uint64(r.buf[r.r : r.r+8]) 45 | } 46 | 47 | func (r *bytesReader) UintAuto(bits int) uint64 { 48 | if bits == 1 { 49 | return uint64(r.Uint32()) 50 | } else if bits == 2 { 51 | return r.Uint64() 52 | } 53 | panic("不支持的系统位数!") 54 | } 55 | 56 | func (r *bytesReader) Offset(index int) { 57 | r.r = index 58 | } 59 | 60 | func (r *bytesReader) Data(begin, length int) []byte { 61 | //if begin+length > r.e { 62 | // return nil, io.EOF 63 | //} 64 | return r.buf[begin : begin+length] 65 | } 66 | 67 | func (r *bytesReader) Party(begin, length int) BytesReader { 68 | //if begin+length > r.e { 69 | // return nil, io.EOF 70 | //} 71 | return NewReader(r.buf[begin:begin+length], r.order) 72 | } 73 | 74 | func NewReader(data []byte, reader binary.ByteOrder) BytesReader { 75 | return &bytesReader{ 76 | buf: data, 77 | r: 0, 78 | e: len(data), 79 | order: reader, 80 | } 81 | } 82 | 83 | // ObjectRead 直接读取对象 84 | func ObjectRead[T any](r BytesReader) (*T, error) { 85 | ret := new(T) 86 | defer func() { 87 | r.r += binary.Size(*ret) 88 | }() 89 | err := binary.Read(bytes.NewReader(r.buf[r.r:]), r.order, ret) 90 | return ret, err 91 | } 92 | 93 | // ReadElf 打开 ELF 文件, 需要记录端序 94 | func ReadElf(file string) (*File, error) { 95 | elf := &File{Name: file} 96 | f, err := os.Open(file) 97 | d, err := io.ReadAll(f) 98 | println(d) 99 | os.Chmod(file, 0666) 100 | data, err := os.ReadFile(file) 101 | if err != nil { 102 | return nil, err 103 | } 104 | magic := Elf_Magic(data[:EI_NIDENT]) 105 | 106 | reader := NewReader(data, magic.Endian()) 107 | elf.Reader = reader 108 | 109 | elf.Ehdr, err = ObjectRead[Elf32_Ehdr](reader) // 前16位 magic 也读 110 | if err != nil { 111 | return nil, err 112 | } 113 | 114 | // ------------------------------------------- 115 | // 先解析段表字符串信息 116 | // ------------------------------------------- 117 | offset := int(elf.Ehdr.Shoff) 118 | shentsize := int(elf.Ehdr.Shentsize) 119 | off := offset + int(elf.Ehdr.Shstrndx)*shentsize 120 | next := reader.Party(off, shentsize) // 这里需要解析为指定数据结构 121 | // 这个是表头, 记录字符串信息的 122 | shstrtab, err := ObjectRead[Elf32_Shdr](next) 123 | if err != nil { 124 | return nil, err 125 | } 126 | shstrTabData := reader.Data(int(shstrtab.Offset), int(shstrtab.Size)) 127 | elf.Shstrtab = shstrTabData 128 | elf.ShstrtabSize = int(shstrtab.Size) 129 | 130 | // ------------------------------------------- 131 | // 解析段表 132 | // ------------------------------------------- 133 | // 读取完整段表 134 | shdrTab := make(map[string]*Elf32_Shdr, int(elf.Ehdr.Shnum)) 135 | shdrNames := make([]string, int(elf.Ehdr.Shnum)) 136 | for index := 0; index < int(elf.Ehdr.Shnum); index++ { 137 | begin := offset + index*shentsize 138 | next = reader.Party(begin, shentsize) 139 | shdr, err := ObjectRead[Elf32_Shdr](next) 140 | if err != nil { 141 | return nil, err 142 | } 143 | name := StringTableName(shstrTabData, shdr.Name) 144 | shdrTab[name] = shdr 145 | shdrNames[index] = name 146 | //if name == "" { //删除空段表项 147 | // shdrTab[name] = nil 148 | //} else { 149 | // shdrTab[name] = shdr 150 | //} 151 | } 152 | elf.ShdrTab = shdrTab 153 | elf.ShdrNames = shdrNames 154 | 155 | strTab := shdrTab[".strtab"] 156 | strTabData := reader.Data(int(strTab.Offset), int(strTab.Size)) 157 | elf.Strtab = strTabData 158 | elf.StrtabSize = int(strTab.Size) 159 | 160 | symTab := shdrTab[".symtab"] 161 | symTabSize := 16 // todo 这个表达式不正确 2 ^ int(symTab.Entsize) // 16 162 | symTabLen := int(symTab.Size) / symTabSize // ➗ 16 163 | symTabList := make(map[string]*Elf32_Sym, symTabLen) 164 | symNames := make([]string, symTabLen) 165 | for i := 0; i < symTabLen; i++ { 166 | begin := int(symTab.Offset) + i*symTabSize 167 | next = reader.Party(begin, symTabSize) 168 | sym, err := ObjectRead[Elf32_Sym](next) 169 | if err != nil { 170 | return nil, err 171 | } 172 | name := StringTableName(strTabData, sym.Name) 173 | symNames[i] = name 174 | symTabList[name] = sym 175 | //if name == "" { //无名符号,对于链接没有意义,按照链接器设计需要记录全局和局部符号,避免名字冲突 176 | // symTabList[name] = nil 177 | //} else { 178 | // symTabList[name] = sym //加入符号表 179 | //} 180 | } 181 | elf.SymTab = symTabList 182 | elf.SymNames = symNames 183 | 184 | elf.RelTab = make([]*Elf32_RelInfo, 0) 185 | for name, relTab := range shdrTab { //所有段的重定位项整合 186 | if strings.HasPrefix(name, ".rel") { // 重定位段 187 | relTabLen := int(relTab.Size) / 8 188 | for i := 0; i < relTabLen; i++ { 189 | begin := int(relTab.Offset) + i*8 190 | next = reader.Party(begin, 8) 191 | rel, err := ObjectRead[Elf32_Rel](next) 192 | if err != nil { 193 | return nil, err 194 | } 195 | sym := symNames[int(rel.Info>>8)] 196 | relName := StringTableName(strTabData, symTabList[sym].Name) 197 | elf.RelTab = append(elf.RelTab, &Elf32_RelInfo{ 198 | SegName: name[4:], 199 | Rel: rel, 200 | RelName: relName, 201 | }) 202 | } 203 | } 204 | } 205 | 206 | return elf, nil 207 | } 208 | -------------------------------------------------------------------------------- /compiler/assemble/internal/token.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | type Token int 9 | 10 | const ( 11 | ILLEGAL Token = iota // 无效标记 12 | EOF // 文件结束标记 13 | COMMENT // 文档注释符 14 | 15 | _literal // 字面量开始标记 16 | IDENT // main 17 | INT // 整数类型 18 | FLOAT // 浮点数 19 | STRING // 字符串 20 | _literalEnd // 字面量结束标记 21 | 22 | _operator // 运算符 23 | ADD // + 24 | SUB // - 25 | LBRACK // [ 26 | COMMA // , 27 | RBRACK // ] 28 | COLON // : 29 | _operatorEnd // 操作符结束 30 | 31 | _keywords 32 | BREAK // 1 33 | CHAR // 2 34 | CONTINUE // 3 35 | 36 | ELSE // 4 37 | EXTERN // 5 38 | 39 | IF // 5 40 | IN // 6 41 | INT // 7 42 | OUT // 8 43 | RETURN // 9 44 | VOID // 11 45 | WHILE // 12 46 | _keywordsEnd 47 | 48 | // 寄存器 49 | BR_AL 50 | BR_CL 51 | BR_DL 52 | BR_BL 53 | BR_AH 54 | BR_CH 55 | BR_DH 56 | BR_BH 57 | DR_EAX 58 | DR_ECX 59 | DR_EDX 60 | DR_EBX 61 | DR_ESP 62 | DR_EBP 63 | DR_ESI 64 | DR_EDI 65 | // 双操作数指令 66 | I_MOV 67 | I_CMP 68 | I_SUB 69 | I_ADD 70 | I_LEA 71 | // 单操作数指令 72 | I_CALL 73 | I_INT 74 | I_IMUL 75 | I_IDIV 76 | I_NEG 77 | I_INC 78 | I_DEC 79 | I_JMP 80 | I_JE 81 | I_JG 82 | I_JL 83 | I_JGE 84 | I_JLE 85 | I_JNE 86 | I_JNA 87 | I_PUSH 88 | I_POP 89 | // 零操作数指令 90 | I_RET 91 | // 汇编指令 92 | K_SEC 93 | K_GLB 94 | K_EQU 95 | K_TIMES 96 | K_DB 97 | K_DW 98 | K_DD 99 | 100 | // 数据段定义相关的token 101 | K_BYTE = iota + _literal + 1 // .byte 102 | K_WORD 103 | K_LONG 104 | K_QUAD 105 | K_ASCII 106 | K_ASCIZ 107 | K_STRING 108 | K_REPT 109 | K_ENDR 110 | 111 | // 段定义相关的token 112 | K_DATA = iota + _literal + 1 // .data 113 | K_TEXT // .text 114 | K_BSS // .bss 115 | K_SECTION // .section 116 | K_GLOBAL // .global 117 | K_LOCAL // .local 118 | K_ALIGN // .align 119 | K_SKIP // .skip 120 | K_SPACE // .space 121 | ) 122 | 123 | var tokens = [...]string{ 124 | ILLEGAL: "ILLEGAL", 125 | EOF: "EOF", 126 | COMMENT: "COMMENT", 127 | IDENT: "IDENT", 128 | INT: "INT", 129 | FLOAT: "FLOAT", 130 | STRING: "STRING", 131 | ADD: "+", 132 | SUB: "-", 133 | LBRACK: "[", 134 | COMMA: ",", 135 | RBRACK: "]", 136 | COLON: ":", 137 | BREAK: "break", 138 | CHAR: "char", 139 | CONTINUE: "continue", 140 | ELSE: "else", 141 | EXTERN: "extern", 142 | IF: "if", 143 | IN: "in", 144 | INT: "int", 145 | OUT: "out", 146 | RETURN: "return", 147 | VOID: "void", 148 | WHILE: "while", 149 | 150 | //TILDE: "~", 151 | } 152 | 153 | var tokenNames = map[Token]string{ 154 | ILLEGAL: "ILLEGAL", 155 | EOF: "EOF", 156 | COMMENT: "COMMENT", 157 | IDENT: "IDENT", 158 | INT: "INT", 159 | FLOAT: "FLOAT", 160 | STRING: "STRING", 161 | ADD: "+", 162 | SUB: "-", 163 | LBRACK: "[", 164 | COMMA: ",", 165 | RBRACK: "]", 166 | COLON: ":", 167 | BREAK: "break", 168 | CHAR: "char", 169 | CONTINUE: "continue", 170 | ELSE: "else", 171 | EXTERN: "extern", 172 | IF: "if", 173 | IN: "in", 174 | INT: "int", 175 | OUT: "out", 176 | RETURN: "return", 177 | VOID: "void", 178 | WHILE: "while", 179 | 180 | //TILDE: "~", 181 | K_BYTE: ".byte", 182 | K_WORD: ".word", 183 | K_LONG: ".long", 184 | K_QUAD: ".quad", 185 | K_QUAD: ".float", 186 | K_QUAD: ".double", 187 | K_ASCII: ".ascii", 188 | K_ASCIZ: ".asciz", 189 | K_STRING: ".string", 190 | K_REPT: ".rept", 191 | K_ENDR: ".endr", 192 | K_DATA: ".data", 193 | K_TEXT: ".text", 194 | K_BSS: ".bss", 195 | K_SECTION: ".section", 196 | K_GLOBAL: ".global", 197 | K_LOCAL: ".local", 198 | K_ALIGN: ".align", 199 | K_SKIP: ".skip", 200 | K_SPACE: ".space", 201 | } 202 | 203 | func (tok Token) String() string { 204 | s := "" 205 | if 0 <= tok && tok < Token(len(tokens)) { 206 | s = tokens[tok] 207 | } 208 | if s == "" { 209 | s = "token(" + strconv.Itoa(int(tok)) + ")" 210 | } 211 | return s 212 | } 213 | 214 | func (tok Token) Message(id string) string { 215 | if tok == INT || tok == FLOAT { 216 | return fmt.Sprintf("number:%s", id) 217 | } else if tok == STRING { 218 | return fmt.Sprintf("string:%s", id) 219 | } else if tok == IDENT { 220 | return id 221 | } 222 | return tok.String() 223 | } 224 | 225 | var keywordsList = []string{ 226 | "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", 227 | "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", 228 | "mov", "cmp", "sub", "add", "lea", 229 | "call", "int", "imul", "idiv", "neg", "inc", "dec", "jmp", "je", "jg", "jl", "jge", "jle", "jne", "jna", "push", "pop", 230 | "ret", 231 | "section", "global", "equ", "times", "db", "dw", "dd", 232 | "text", "data", "bss", // 添加段名 233 | } 234 | var keywordsTable = []Token{ 235 | BR_AL, BR_CL, BR_DL, BR_BL, BR_AH, BR_CH, BR_DH, BR_BH, 236 | DR_EAX, DR_ECX, DR_EDX, DR_EBX, DR_ESP, DR_EBP, DR_ESI, DR_EDI, 237 | I_MOV, I_CMP, I_SUB, I_ADD, I_LEA, 238 | I_CALL, I_INT, I_IMUL, I_IDIV, I_NEG, I_INC, I_DEC, I_JMP, I_JE, I_JG, I_JL, I_JGE, I_JLE, I_JNE, I_JNA, I_PUSH, I_POP, 239 | I_RET, 240 | K_SEC, K_GLB, K_EQU, K_TIMES, K_DB, K_DW, K_DD, 241 | IDENT, IDENT, IDENT, // 段名作为标识符处理 242 | } 243 | 244 | func Keywords(ident string) (Token, bool) { 245 | for i, k := range keywordsList { 246 | if k == ident { 247 | return keywordsTable[i], true 248 | } 249 | } 250 | return ILLEGAL, false 251 | } 252 | 253 | func Lookup(ident string) Token { 254 | for i, k := range keywordsList { 255 | if k == ident { 256 | return keywordsTable[i] 257 | } 258 | } 259 | return IDENT 260 | } 261 | 262 | func (tok Token) IsLiteral() bool { return _literal < tok && tok < _literalEnd } 263 | -------------------------------------------------------------------------------- /compiler/compile/parser/params.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "github.com/facelang/face/compiler/compile/tokens" 5 | "github.com/facelang/face/internal/prog" 6 | ) 7 | 8 | // const a int = 1 // byte, int8, int16, int32, int64, uint8, uint16, uint32, uint64, bool, string, 9 | // let b array = [1,2,3] 10 | // let b map = {1: "a", 2: "b", 3: "c"} 11 | // let c classA = {a: 1, b: 2, c: 3} 12 | 13 | // ParamOrNil = [ IdentifierList ] [ "..." ] Type . 只在参数列表中调用 follow=close 14 | func ParamOrNil(p *parser, name *prog.Name, follow token.Token) *prog.Field { 15 | 16 | pos := p.FilePos 17 | if name != nil { 18 | pos = name.Pos() 19 | } 20 | 21 | f := new(prog.Field) 22 | f.pos = pos 23 | 24 | if p.token == token.IDENT || name != nil { 25 | // name 26 | if name == nil { 27 | name = p.name() 28 | } 29 | 30 | if p.tok == _Dot { // name.*** 31 | // name "." ... 32 | f.Type = p.qualifiedName(name) 33 | if typeSetsOk && p.tok == _Operator && p.op == Or { 34 | // name "." name "|" ... 35 | f = p.embeddedElem(f) 36 | } 37 | return f 38 | } 39 | 40 | if typeSetsOk && p.tok == _Operator && p.op == Or { 41 | // name "|" ... 42 | f.Type = name 43 | return p.embeddedElem(f) 44 | } 45 | 46 | f.Name = name 47 | } 48 | 49 | if p.token == prog.DotsType { 50 | // [name] "..." ... 51 | t := new(DotsType) 52 | t.pos = p.pos() 53 | p.next() 54 | t.Elem = p.typeOrNil() 55 | if t.Elem == nil { 56 | t.Elem = p.badExpr() 57 | p.syntaxError("... is missing type") 58 | } 59 | f.Type = t 60 | return f 61 | } 62 | 63 | if typeSetsOk && p.tok == _Operator && p.op == Tilde { 64 | // [name] "~" ... 65 | f.Type = p.embeddedElem(nil).Type 66 | return f 67 | } 68 | 69 | f.Type = p.typeOrNil() 70 | if typeSetsOk && p.tok == _Operator && p.op == Or && f.Type != nil { 71 | // [name] type "|" 72 | f = p.embeddedElem(f) 73 | } 74 | if f.Name != nil || f.Type != nil { 75 | return f 76 | } 77 | 78 | p.syntaxError("expected " + tokstring(follow)) 79 | p.advance(_Comma, follow) 80 | return nil 81 | } 82 | 83 | // Parameters = "(" [ ParameterList [ "," ] ] ")" . 84 | // ParameterList = ParameterDecl { "," ParameterDecl } . 85 | // "(" or "[" has already been consumed. 86 | // If name != nil, it is the first name after "(" or "[". 87 | // If typ != nil, name must be != nil, and (name, typ) is the first field in the list. 88 | // In the result list, either all fields have a name, or no field has a name. 89 | 90 | // p.paramList(nil, nil, _Rbrack, true) 91 | func paramList(p *parser, close token.Token, requireNames bool) (list []*Field) { 92 | 93 | var named int // number of parameters that have an explicit name and type 94 | var typed int // number of parameters that have an explicit type 95 | end := p.list("parameter list", COMMA, close, func() bool { 96 | var par *prog.Field 97 | f := ParamOrNil(p) 98 | 99 | name = nil // 1st name was consumed if present 100 | typ = nil // 1st type was consumed if present 101 | if par != nil { 102 | if debug && par.Name == nil && par.Type == nil { 103 | panic("parameter without name or type") 104 | } 105 | if par.Name != nil && par.Type != nil { 106 | named++ 107 | } 108 | if par.Type != nil { 109 | typed++ 110 | } 111 | list = append(list, par) 112 | } 113 | return false 114 | }) 115 | 116 | if len(list) == 0 { 117 | return 118 | } 119 | 120 | // distribute parameter types (len(list) > 0) 121 | if named == 0 && !requireNames { 122 | // all unnamed and we're not in a type parameter list => found names are named types 123 | for _, par := range list { 124 | if typ := par.Name; typ != nil { 125 | par.Type = typ 126 | par.Name = nil 127 | } 128 | } 129 | } else if named != len(list) { 130 | // some named or we're in a type parameter list => all must be named 131 | var errPos Pos // left-most error position (or unknown) 132 | var typ Expr // current type (from right to left) 133 | for i := len(list) - 1; i >= 0; i-- { 134 | par := list[i] 135 | if par.Type != nil { 136 | typ = par.Type 137 | if par.Name == nil { 138 | errPos = StartPos(typ) 139 | par.Name = NewName(errPos, "_") 140 | } 141 | } else if typ != nil { 142 | par.Type = typ 143 | } else { 144 | // par.Type == nil && typ == nil => we only have a par.Name 145 | errPos = par.Name.Pos() 146 | t := p.badExpr() 147 | t.pos = errPos // correct position 148 | par.Type = t 149 | } 150 | } 151 | if errPos.IsKnown() { 152 | // Not all parameters are named because named != len(list). 153 | // If named == typed, there must be parameters that have no types. 154 | // They must be at the end of the parameter list, otherwise types 155 | // would have been filled in by the right-to-left sweep above and 156 | // there would be no error. 157 | // If requireNames is set, the parameter list is a type parameter 158 | // list. 159 | var msg string 160 | if named == typed { 161 | errPos = end // position error at closing token ) or ] 162 | if requireNames { 163 | msg = "missing type constraint" 164 | } else { 165 | msg = "missing parameter type" 166 | } 167 | } else { 168 | if requireNames { 169 | msg = "missing type parameter name" 170 | // go.dev/issue/60812 171 | if len(list) == 1 { 172 | msg += " or invalid array length" 173 | } 174 | } else { 175 | msg = "missing parameter name" 176 | } 177 | } 178 | p.syntaxErrorAt(errPos, msg) 179 | } 180 | } 181 | 182 | return 183 | } 184 | 185 | func (p *parser) list(context string, sep, close token.Token, f func() bool) prog.FilePos { 186 | done := false 187 | 188 | for p.token != token.EOF && p.token != close && !done { 189 | done = f() 190 | 191 | if !p.got(sep) && p.token != close { 192 | p.errorf("list for %s; missing %s or %s", context, sep, close) 193 | return p.FilePos 194 | } 195 | } 196 | 197 | pos := p.FilePos 198 | p.expect(close) 199 | return pos 200 | } 201 | -------------------------------------------------------------------------------- /docs/ascii.md: -------------------------------------------------------------------------------- 1 | ### ASCII 码表 2 | 3 | | 十进制 | 十六进制 | 字符 | 描述 | 4 | |------|----------|------|-----------------------------| 5 | | 0 | 0x00 | NUL | Null | 6 | | 1 | 0x01 | SOH | Start of Header | 7 | | 2 | 0x02 | STX | Start of Text | 8 | | 3 | 0x03 | ETX | End of Text | 9 | | 4 | 0x04 | EOT | End of Transmission | 10 | | 5 | 0x05 | ENQ | Enquiry | 11 | | 6 | 0x06 | ACK | Acknowledge | 12 | | 7 | 0x07 | BEL | Bell | 13 | | 8 | 0x08 | BS | Backspace | 14 | | 9 | 0x09 | TAB | Horizontal Tab | 15 | | 10 | 0x0A | LF | Line Feed (New Line) | 16 | | 11 | 0x0B | VT | Vertical Tab | 17 | | 12 | 0x0C | FF | Form Feed (New Page) | 18 | | 13 | 0x0D | CR | Carriage Return | 19 | | 14 | 0x0E | SO | Shift Out | 20 | | 15 | 0x0F | SI | Shift In | 21 | | 16 | 0x10 | DLE | Data Link Escape | 22 | | 17 | 0x11 | DC1 | Device Control 1 (XON) | 23 | | 18 | 0x12 | DC2 | Device Control 2 | 24 | | 19 | 0x13 | DC3 | Device Control 3 (XOFF) | 25 | | 20 | 0x14 | DC4 | Device Control 4 | 26 | | 21 | 0x15 | NAK | Negative Acknowledge | 27 | | 22 | 0x16 | SYN | Synchronous Idle | 28 | | 23 | 0x17 | ETB | End of Transmit Block | 29 | | 24 | 0x18 | CAN | Cancel | 30 | | 25 | 0x19 | EM | End of Medium | 31 | | 26 | 0x1A | SUB | Substitute | 32 | | 27 | 0x1B | ESC | Escape | 33 | | 28 | 0x1C | FS | File Separator | 34 | | 29 | 0x1D | GS | Group Separator | 35 | | 30 | 0x1E | RS | Record Separator | 36 | | 31 | 0x1F | US | Unit Separator | 37 | | 32 | 0x20 | SP | Space | 38 | | 33 | 0x21 | ! | Exclamation Mark | 39 | | 34 | 0x22 | " | Double Quote | 40 | | 35 | 0x23 | # | Number Sign (Hash) | 41 | | 36 | 0x24 | $ | Dollar Sign | 42 | | 37 | 0x25 | % | Percent Sign | 43 | | 38 | 0x26 | & | Ampersand | 44 | | 39 | 0x27 | ' | Single Quote | 45 | | 40 | 0x28 | ( | Left Parenthesis | 46 | | 41 | 0x29 | ) | Right Parenthesis | 47 | | 42 | 0x2A | * | Asterisk | 48 | | 43 | 0x2B | + | Plus Sign | 49 | | 44 | 0x2C | , | Comma | 50 | | 45 | 0x2D | - | Hyphen-Minus | 51 | | 46 | 0x2E | . | Period (Dot) | 52 | | 47 | 0x2F | / | Slash (Forward Slash) | 53 | | 48 | 0x30 | 0 | Digit 0 | 54 | | 49 | 0x31 | 1 | Digit 1 | 55 | | 50 | 0x32 | 2 | Digit 2 | 56 | | 51 | 0x33 | 3 | Digit 3 | 57 | | 52 | 0x34 | 4 | Digit 4 | 58 | | 53 | 0x35 | 5 | Digit 5 | 59 | | 54 | 0x36 | 6 | Digit 6 | 60 | | 55 | 0x37 | 7 | Digit 7 | 61 | | 56 | 0x38 | 8 | Digit 8 | 62 | | 57 | 0x39 | 9 | Digit 9 | 63 | | 58 | 0x3A | : | Colon | 64 | | 59 | 0x3B | ; | Semicolon | 65 | | 60 | 0x3C | < | Less-Than Sign | 66 | | 61 | 0x3D | = | Equals Sign | 67 | | 62 | 0x3E | > | Greater-Than Sign | 68 | | 63 | 0x3F | ? | Question Mark | 69 | | 64 | 0x40 | @ | At Sign | 70 | | 65 | 0x41 | A | Uppercase A | 71 | | 66 | 0x42 | B | Uppercase B | 72 | | 67 | 0x43 | C | Uppercase C | 73 | | 68 | 0x44 | D | Uppercase D | 74 | | 69 | 0x45 | E | Uppercase E | 75 | | 70 | 0x46 | F | Uppercase F | 76 | | 71 | 0x47 | G | Uppercase G | 77 | | 72 | 0x48 | H | Uppercase H | 78 | | 73 | 0x49 | I | Uppercase I | 79 | | 74 | 0x4A | J | Uppercase J | 80 | | 75 | 0x4B | K | Uppercase K | 81 | | 76 | 0x4C | L | Uppercase L | 82 | | 77 | 0x4D | M | Uppercase M | 83 | | 78 | 0x4E | N | Uppercase N | 84 | | 79 | 0x4F | O | Uppercase O | 85 | | 80 | 0x50 | P | Uppercase P | 86 | | 81 | 0x51 | Q | Uppercase Q | 87 | | 82 | 0x52 | R | Uppercase R | 88 | | 83 | 0x53 | S | Uppercase S | 89 | | 84 | 0x54 | T | Uppercase T | 90 | | 85 | 0x55 | U | Uppercase U | 91 | | 86 | 0x56 | V | Uppercase V | 92 | | 87 | 0x57 | W | Uppercase W | 93 | | 88 | 0x58 | X | Uppercase X | 94 | | 89 | 0x59 | Y | Uppercase Y | 95 | | 90 | 0x5A | Z | Uppercase Z | 96 | | 91 | 0x5B | [ | Left Square Bracket | 97 | | 92 | 0x5C | \ | Backslash | 98 | | 93 | 0x5D | ] | Right Square Bracket | 99 | | 94 | 0x5E | ^ | Caret (Circumflex Accent) | 100 | | 95 | 0x5F | _ | Underscore | 101 | | 96 | 0x60 | ` | Grave Accent | 102 | | 97 | 0x61 | a | Lowercase a | 103 | | 98 | 0x62 | b | Lowercase b | 104 | | 99 | 0x63 | c | Lowercase c | 105 | | 100 | 0x64 | d | Lowercase d | 106 | | 101 | 0x65 | e | Lowercase e | 107 | | 102 | 0x66 | f | Lowercase f | 108 | | 103 | 0x67 | g | Lowercase g | 109 | | 104 | 0x68 | h | Lowercase h | 110 | | 105 | 0x69 | i | Lowercase i | 111 | | 106 | 0x6A | j | Lowercase j | 112 | | 107 | 0x6B | k | Lowercase k | 113 | | 108 | 0x6C | l | Lowercase l | 114 | | 109 | 0x6D | m | Lowercase m | 115 | | 110 | 0x6E | n | Lowercase n | 116 | | 111 | 0x6F | o | Lowercase o | 117 | | 112 | 0x70 | p | Lowercase p | 118 | | 113 | 0x71 | q | Lowercase q | 119 | | 114 | 0x72 | r | Lowercase r | 120 | | 115 | 0x73 | s | Lowercase s | 121 | | 116 | 0x74 | t | Lowercase t | 122 | | 117 | 0x75 | u | Lowercase u | 123 | | 118 | 0x76 | v | Lowercase v | 124 | | 119 | 0x77 | w | Lowercase w | 125 | | 120 | 0x78 | x | Lowercase x | 126 | | 121 | 0x79 | y | Lowercase y | 127 | | 122 | 0x7A | z | Lowercase z | 128 | | 123 | 0x7B | { | Left Curly Brace | 129 | | 124 | 0x7C | \| | Vertical Bar | 130 | | 125 | 0x7D | } | Right Curly Brace | 131 | | 126 | 0x7E | ~ | Tilde | 132 | | 127 | 0x7F | DEL | Delete | 133 | 134 | 这张表包括了 ASCII 字符集中的所有标准字符,从控制字符到可打印字符。 -------------------------------------------------------------------------------- /compiler/compile/token/token.go: -------------------------------------------------------------------------------- 1 | // Copyright 2009 The Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package token defines constants representing the lexical tokens of the Go 6 | // programming language and basic operations on tokens (printing, predicates). 7 | package token 8 | 9 | import ( 10 | "strconv" 11 | "unicode" 12 | "unicode/utf8" 13 | ) 14 | 15 | // Token is the set of lexical tokens of the Go programming language. 16 | type Token int 17 | 18 | // The list of tokens. 19 | const ( 20 | // Special tokens 21 | ILLEGAL Token = iota 22 | EOF 23 | COMMENT 24 | NEWLINE 25 | 26 | literal_beg 27 | // Identifiers and basic type literals 28 | // (these tokens stand for classes of literals) 29 | IDENT // main 30 | INT // 12345 31 | FLOAT // 123.45 32 | IMAG // 123.45i 33 | CHAR // 'a', utf8 字符 34 | STRING // "abc" 35 | literal_end 36 | 37 | operator_beg 38 | // Operators and delimiters 39 | ADD // + 40 | SUB // - 41 | MUL // * 42 | QUO // / 43 | REM // % 44 | 45 | AND // & 46 | OR // | 47 | XOR // ^ 48 | SHL // << 49 | SHR // >> 50 | AND_NOT // &^ 51 | 52 | ADD_ASSIGN // += 53 | SUB_ASSIGN // -= 54 | MUL_ASSIGN // *= 55 | QUO_ASSIGN // /= 56 | REM_ASSIGN // %= 57 | 58 | AND_ASSIGN // &= 59 | OR_ASSIGN // |= 60 | XOR_ASSIGN // ^= 61 | SHL_ASSIGN // <<= 62 | SHR_ASSIGN // >>= 63 | AND_NOT_ASSIGN // &^= 64 | 65 | LAND // && 66 | LOR // || 67 | ARROW // <- 68 | INC // ++ 69 | DEC // -- 70 | 71 | EQL // == 72 | LSS // < 73 | GTR // > 74 | ASSIGN // = 75 | NOT // ! 76 | 77 | NEQ // != 78 | LEQ // <= 79 | GEQ // >= 80 | DEFINE // := 81 | ELLIPSIS // ... 82 | 83 | LPAREN // ( 84 | LBRACK // [ 85 | LBRACE // { 86 | COMMA // , 87 | PERIOD // . 88 | 89 | RPAREN // ) 90 | RBRACK // ] 91 | RBRACE // } 92 | SEMICOLON // ; 93 | COLON // : 94 | operator_end 95 | 96 | keyword_beg 97 | // Keywords 98 | BREAK 99 | CASE 100 | CHAN 101 | CONST 102 | CONTINUE 103 | 104 | DEFAULT 105 | DEFER 106 | ELSE 107 | FALLTHROUGH 108 | FOR 109 | 110 | FUNC 111 | GO 112 | GOTO 113 | IF 114 | IMPORT 115 | FROM 116 | 117 | INTERFACE 118 | MAP 119 | PACKAGE 120 | RANGE 121 | RETURN 122 | 123 | SELECT 124 | STRUCT 125 | SWITCH 126 | TYPE 127 | LET 128 | keyword_end 129 | 130 | additional_beg 131 | // additional tokens, handled in an ad-hoc manner 132 | TILDE 133 | additional_end 134 | ) 135 | 136 | var tokens = [...]string{ 137 | ILLEGAL: "ILLEGAL", 138 | 139 | EOF: "EOF", 140 | COMMENT: "COMMENT", 141 | NEWLINE: "NEWLINE", 142 | 143 | IDENT: "IDENT", 144 | INT: "INT", 145 | FLOAT: "FLOAT", 146 | IMAG: "IMAG", 147 | CHAR: "CHAR", 148 | STRING: "STRING", 149 | 150 | ADD: "+", 151 | SUB: "-", 152 | MUL: "*", 153 | QUO: "/", 154 | REM: "%", 155 | 156 | AND: "&", 157 | OR: "|", 158 | XOR: "^", 159 | SHL: "<<", 160 | SHR: ">>", 161 | AND_NOT: "&^", 162 | 163 | ADD_ASSIGN: "+=", 164 | SUB_ASSIGN: "-=", 165 | MUL_ASSIGN: "*=", 166 | QUO_ASSIGN: "/=", 167 | REM_ASSIGN: "%=", 168 | 169 | AND_ASSIGN: "&=", 170 | OR_ASSIGN: "|=", 171 | XOR_ASSIGN: "^=", 172 | SHL_ASSIGN: "<<=", 173 | SHR_ASSIGN: ">>=", 174 | AND_NOT_ASSIGN: "&^=", 175 | 176 | LAND: "&&", 177 | LOR: "||", 178 | ARROW: "<-", 179 | INC: "++", 180 | DEC: "--", 181 | 182 | EQL: "==", 183 | LSS: "<", 184 | GTR: ">", 185 | ASSIGN: "=", 186 | NOT: "!", 187 | 188 | NEQ: "!=", 189 | LEQ: "<=", 190 | GEQ: ">=", 191 | DEFINE: ":=", 192 | ELLIPSIS: "...", 193 | 194 | LPAREN: "(", 195 | LBRACK: "[", 196 | LBRACE: "{", 197 | COMMA: ",", 198 | PERIOD: ".", 199 | 200 | RPAREN: ")", 201 | RBRACK: "]", 202 | RBRACE: "}", 203 | SEMICOLON: ";", 204 | COLON: ":", 205 | 206 | BREAK: "break", 207 | CASE: "case", 208 | CHAN: "chan", 209 | CONST: "const", 210 | CONTINUE: "continue", 211 | 212 | DEFAULT: "default", 213 | DEFER: "defer", 214 | ELSE: "else", 215 | FALLTHROUGH: "fallthrough", 216 | FOR: "for", 217 | 218 | FUNC: "func", 219 | GO: "go", 220 | GOTO: "goto", 221 | IF: "if", 222 | IMPORT: "import", 223 | FROM: "from", 224 | 225 | INTERFACE: "interface", 226 | MAP: "map", 227 | PACKAGE: "package", 228 | RANGE: "range", 229 | RETURN: "return", 230 | 231 | SELECT: "select", 232 | STRUCT: "struct", 233 | SWITCH: "switch", 234 | TYPE: "type", 235 | LET: "let", 236 | 237 | TILDE: "~", 238 | } 239 | 240 | // String returns the string corresponding to the token tok. 241 | // For operators, delimiters, and keywords the string is the actual 242 | // token CHARacter sequence (e.g., for the token [ADD], the string is 243 | // "+"). For all other tokens the string corresponds to the token 244 | // constant name (e.g. for the token [IDENT], the string is "IDENT"). 245 | func (tok Token) String() string { 246 | s := "" 247 | if 0 <= tok && tok < Token(len(tokens)) { 248 | s = tokens[tok] 249 | } 250 | if s == "" { 251 | s = "token(" + strconv.Itoa(int(tok)) + ")" 252 | } 253 | return s 254 | } 255 | 256 | // A set of constants for precedence-based expression parsing. 257 | // Non-operators have lowest precedence, followed by operators 258 | // starting with precedence 1 up to unary operators. The highest 259 | // precedence serves as "catch-all" precedence for selector, 260 | // indexing, and other operator and delimiter tokens. 261 | const ( 262 | LowestPrec = 0 // non-operators 263 | UnaryPrec = 6 264 | HighestPrec = 7 265 | ) 266 | 267 | // Precedence returns the operator precedence of the binary 268 | // operator op. If op is not a binary operator, the result 269 | // is LowestPrecedence. 270 | func (op Token) Precedence() int { 271 | switch op { 272 | case LOR: // || 273 | return 1 274 | case LAND: 275 | return 2 // && 276 | case EQL, NEQ, LSS, LEQ, GTR, GEQ: 277 | return 3 // 逻辑运算 278 | case ADD, SUB, OR, XOR: 279 | return 4 // +-|^ 280 | case MUL, QUO, REM, SHL, SHR, AND, AND_NOT: 281 | return 5 // */% << >> & &^ 282 | } 283 | return LowestPrec 284 | } 285 | 286 | var keywords map[string]Token 287 | 288 | func init() { 289 | keywords = make(map[string]Token, keyword_end-(keyword_beg+1)) 290 | for i := keyword_beg + 1; i < keyword_end; i++ { 291 | keywords[tokens[i]] = i 292 | } 293 | } 294 | 295 | // Lookup maps an identifier to its keyword token or [IDENT] (if not a keyword). 296 | func Lookup(ident string) Token { 297 | if tok, is_keyword := keywords[ident]; is_keyword { 298 | return tok 299 | } 300 | return IDENT 301 | } 302 | 303 | // Predicates 304 | 305 | // IsLiteral returns true for tokens corresponding to identifiers 306 | // and basic type literals; it returns false otherwise. 307 | func (tok Token) IsLiteral() bool { return literal_beg < tok && tok < literal_end } 308 | 309 | // IsOperator returns true for tokens corresponding to operators and 310 | // delimiters; it returns false otherwise. 311 | func (tok Token) IsOperator() bool { 312 | return (operator_beg < tok && tok < operator_end) || tok == TILDE 313 | } 314 | 315 | // IsKeyword returns true for tokens corresponding to keywords; 316 | // it returns false otherwise. 317 | func (tok Token) IsKeyword() bool { return keyword_beg < tok && tok < keyword_end } 318 | 319 | // IsExported reports whether name starts with an upper-case letter. 320 | func IsExported(name string) bool { 321 | ch, _ := utf8.DecodeRuneInString(name) 322 | return unicode.IsUpper(ch) 323 | } 324 | 325 | // IsKeyword reports whether name is a Go keyword, such as "func" or "return". 326 | func IsKeyword(name string) bool { 327 | // TODO: opt: use a perfect hash function instead of a global map. 328 | _, ok := keywords[name] 329 | return ok 330 | } 331 | 332 | // IsIdentifier reports whether name is a Go identifier, that is, a non-empty 333 | // string made up of letters, digits, and underscores, where the first CHARacter 334 | // is not a digit. Keywords are not identifiers. 335 | func IsIdentifier(name string) bool { 336 | if name == "" || IsKeyword(name) { 337 | return false 338 | } 339 | for i, c := range name { 340 | if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) { 341 | return false 342 | } 343 | } 344 | return true 345 | } 346 | 347 | func TokenLabel(token Token, ident string) string { 348 | if token.IsLiteral() { 349 | return ident 350 | } 351 | return token.String() 352 | } 353 | 354 | //type Token rune 355 | // 356 | //const ( 357 | // ILLEGAL Token = (1 << 7) - iota // error 358 | // EOF // 结束 359 | // COMMENT // 注释 360 | // NEWLINE // \n 换行符 361 | // IDENT // label 362 | // INT // 123456 363 | // FLOAT // 123.456 364 | // IMAG // 123.1i 复数 365 | // CHAR // '' 366 | // STRING // "", `` 367 | //) 368 | // 369 | //LET NameTable = [...]string{ 370 | // ILLEGAL: "ILLEGAL", 371 | // 372 | // EOF: "EOF", 373 | // COMMENT: "COMMENT", 374 | // NEWLINE: "NEWLINE", 375 | // 376 | // IDENT: "IDENT", 377 | // INT: "INT", 378 | // FLOAT: "FLOAT", 379 | // IMAG: "IMAG", 380 | // CHAR: "CHAR", 381 | // STRING: "STRING", 382 | //} 383 | // 384 | //func (token Token) String() (name string) { 385 | // if token >= 0 && token <= ILLEGAL { 386 | // name = NameTable[token] 387 | // } 388 | // if name == "" { 389 | // name = "token(" + strconv.Itoa(int(token)) + ")" 390 | // } 391 | // return name 392 | //} 393 | // 394 | //func (token Token) Label(ident string) string { 395 | // if token.IsLiteral() { 396 | // return ident 397 | // } 398 | // return token.String() 399 | //} 400 | // 401 | //func (token Token) IsLiteral() bool { return IDENT <= token && token <= STRING } 402 | -------------------------------------------------------------------------------- /compiler/compile/parser/parser.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "fmt" 5 | "github.com/facelang/face/compiler/compile/ast" 6 | "github.com/facelang/face/compiler/compile/token" 7 | ) 8 | 9 | type parser struct { 10 | *lexer // 符号读取器 11 | token token.Token // 符号 12 | literal string // 字面量 13 | exprLev int // 表达式层级 14 | inRhs bool // 是否右值表达式 15 | nestLev int // 递归嵌套计数器 16 | } 17 | 18 | func (p *parser) next() { 19 | for { 20 | p.token = p.NextToken() 21 | p.literal += p.identifier 22 | if p.token == token.COMMENT { 23 | continue 24 | } 25 | if p.token == token.NEWLINE { 26 | continue 27 | } 28 | break 29 | } 30 | } 31 | 32 | func (p *parser) got(token token.Token) bool { 33 | if p.token == token { 34 | p.next() 35 | return true 36 | } 37 | return false 38 | } 39 | 40 | func (p *parser) error(pos token.Pos, msg string) { 41 | //if p.errors.Len() > 10 { 42 | // panic(p.errors) 43 | //} 44 | //p.errors.Add(pos, msg) 45 | panic(fmt.Errorf("%s:%d:%d: %s", pos, p.literal, pos, msg)) 46 | } 47 | 48 | func (p *parser) errorf(format string, args ...interface{}) { 49 | //p.errors.Add(p.pos, fmt.Sprintf(format, args...)) 50 | } 51 | 52 | func (p *parser) expect(token token.Token) token.Pos { 53 | pos := p.pos 54 | if p.token != token { 55 | p.unexpect(token.String()) 56 | } 57 | 58 | p.next() 59 | return pos 60 | } 61 | 62 | func (p *parser) unexpect(except string) { 63 | found := token.TokenLabel(p.token, p.identifier) 64 | p.errorf("except %s, found %s", except, found) 65 | } 66 | 67 | // ---------------------------------------------------------------------------- 68 | // Identifiers 69 | 70 | // name = identifier . 71 | func (p *parser) name() *ast.Name { 72 | if p.token != token.IDENT { 73 | p.unexpect("identifier") 74 | } 75 | 76 | n := new(ast.Name) 77 | n.Pos = p.pos 78 | n.Name = p.literal 79 | 80 | p.next() 81 | return n 82 | } 83 | 84 | // nameList = name { "," name } . 85 | func (p *parser) nameList(name *ast.Name) []*ast.Name { 86 | list := []*ast.Name{name} 87 | for p.token == token.COMMA { 88 | p.next() 89 | list = append(list, p.name()) 90 | } 91 | return list 92 | } 93 | 94 | // 参考 ES6 import {} from "" 语法 95 | // 暂不支持解包,只支持两种语法: 96 | // import name from "" 97 | // import "" 98 | func (p *parser) pkg() *ast.Package { 99 | d := &ast.Package{Pos: p.expect(token.IMPORT)} 100 | 101 | if p.token == token.IDENT { 102 | d.Name = p.literal 103 | p.expect(token.FROM) 104 | } 105 | 106 | d.Path = p.literal 107 | return d 108 | } 109 | 110 | // const name1, name2, ... type = val1, val2, ... 111 | // let name1, name2, ... type = val1, val2, ... 112 | func (p *parser) genDecl(require token.Token) ast.Decl { 113 | pos := p.expect(require) 114 | 115 | names := p.nameList(p.name()) 116 | var typ ast.Expr 117 | var values []ast.Expr 118 | if p.token != token.EOF && p.token != token.SEMICOLON && p.token != token.RPAREN { 119 | typ = p.tryIdentOrType() 120 | if p.token == token.ASSIGN { 121 | p.next() 122 | values = exprList(p, true) 123 | } 124 | } 125 | 126 | return &ast.GenDecl{ 127 | Pos: pos, 128 | Token: require, 129 | Names: names, 130 | Type: typ, 131 | Values: values, 132 | } 133 | } 134 | 135 | func (p *parser) funcDecl() ast.Decl { 136 | pos := p.expect(token.FUNC) 137 | name := p.name() 138 | 139 | // 参数列表,包括泛型参数 140 | _, params := p.parseParameters(true) 141 | 142 | results := p.parseResult() // (...) 返回结果 143 | 144 | var body *ast.BlockStmt 145 | switch p.token { 146 | case token.LBRACE: // {} 147 | body = p.parseBody() 148 | case token.ASSIGN: 149 | // todo 单行表达式 150 | default: 151 | // 第二种情况: func func2(a, b int) [int] = a + b 152 | // 第三种情况: const func3 = (a, b) => a + b 153 | // const func4 = func() {} 154 | // const func5 = func4 别名 155 | panic("函数声明 func name(){} 或者 func name() = express") 156 | } 157 | 158 | return &ast.FuncDecl{ 159 | Pos: pos, 160 | Name: name, 161 | Type: &ast.FuncType{ 162 | Params: params, 163 | Results: results, 164 | }, 165 | Body: body, 166 | } 167 | } 168 | 169 | // SourceFile = { ImportDecl ";" } { TopLevelDecl ";" } . 170 | func (p *parser) parseFile() *ast.File { 171 | f := new(ast.File) 172 | 173 | prev := token.EOF 174 | for p.token != token.EOF { 175 | prev = p.token 176 | 177 | switch p.token { 178 | case token.IMPORT: 179 | if prev != token.IMPORT { 180 | p.error(p.pos, "import 语法只能出现在文件头部!") 181 | } 182 | f.Imports = append(f.Imports, p.pkg()) 183 | case token.CONST, token.LET: 184 | f.DeclList = append(f.DeclList, p.genDecl(p.token)) 185 | case token.FUNC: 186 | p.next() 187 | f.DeclList = append(f.DeclList, p.funcDecl()) 188 | default: 189 | p.error(p.pos, "顶层语法仅支持 const, let, type, func 关键字定义!") 190 | } 191 | } 192 | 193 | return f 194 | } 195 | 196 | func (p *parser) parseBody() *ast.BlockStmt { 197 | lbrace := p.expect(token.LBRACE) // { 198 | list := p.parseStmtList() 199 | rbrace := p.expect(token.RBRACE) // } 200 | 201 | return &ast.BlockStmt{Lbrace: lbrace, List: list, Rbrace: rbrace} 202 | } 203 | 204 | func (p *parser) parseBlockStmt() *ast.BlockStmt { 205 | return p.parseBody() 206 | } 207 | 208 | // gotAssign = "=" . 209 | func (p *parser) gotAssign() bool { 210 | if p.token == token.ASSIGN { 211 | p.next() 212 | return true 213 | } 214 | return false 215 | } 216 | 217 | // block{}, case:, select case 会调用 218 | func (p *parser) parseStmtList() (list []ast.Stmt) { 219 | for p.token != token.CASE && p.token != token.DEFAULT && p.token != token.RBRACE && p.token != token.EOF { 220 | list = append(list, p.parseStmt()) 221 | } 222 | 223 | return 224 | } 225 | 226 | func (p *parser) parseStmt() (s ast.Stmt) { 227 | defer decNestLev(incNestLev(p)) 228 | 229 | switch p.token { 230 | case token.CONST, token.LET: 231 | s = &ast.DeclStmt{Decl: p.genDecl(p.token)} 232 | case 233 | token.IDENT, token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING, token.FUNC, token.LPAREN, // operands 234 | token.LBRACK, token.STRUCT, token.MAP, token.CHAN, token.INTERFACE, // composite types 235 | token.ADD, token.SUB, token.MUL, token.AND, token.XOR, token.ARROW, token.NOT: // unary operators 236 | s, _ = p.parseSimpleStmt(labelOk) 237 | // because of the required look-ahead, labeled statements are 238 | // parsed by parseSimpleStmt - don't expect a semicolon after 239 | // them 240 | if _, isLabeledStmt := s.(*ast.LabeledStmt); !isLabeledStmt { 241 | p.expectSemi() 242 | } 243 | case token.RETURN: 244 | s = p.parseReturnStmt() 245 | case token.BREAK, token.CONTINUE, token.GOTO, token.FALLTHROUGH: 246 | s = p.parseBranchStmt(p.token) 247 | // todo 存在块代码嵌套需要处理{ {} } 248 | case token.IF: 249 | s = p.parseIfStmt() 250 | case token.SWITCH: 251 | s = p.parseSwitchStmt() 252 | case token.FOR: 253 | s = p.parseForStmt() 254 | case token.SEMICOLON: 255 | // Is it ever possible to have an implicit semicolon 256 | // producing an empty statement in a valid program? 257 | // (handle correctly anyway) 258 | s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: p.lit == "\n"} 259 | p.next() 260 | case token.RBRACE: 261 | // a semicolon may be omitted before a closing "}" 262 | s = &ast.EmptyStmt{Semicolon: p.pos, Implicit: true} 263 | default: 264 | // no statement found 265 | pos := p.pos 266 | p.errorExpected(pos, "statement") 267 | p.advance(stmtStart) 268 | s = &ast.BadStmt{From: pos, To: p.pos} 269 | } 270 | 271 | return 272 | } 273 | 274 | // ---------------------------------------------------------------------------- 275 | // Statements 276 | 277 | // Parsing modes for parseSimpleStmt. 278 | const ( 279 | basic = iota 280 | labelOk 281 | rangeOk 282 | ) 283 | 284 | // parseSimpleStmt returns true as 2nd result if it parsed the assignment 285 | // of a range clause (with mode == rangeOk). The returned statement is an 286 | // assignment with a right-hand side that is a single unary expression of 287 | // the form "range x". No guarantees are given for the left-hand side. 288 | func (p *parser) parseSimpleStmt(mode int) (ast.Stmt, bool) { 289 | if p.trace { 290 | defer un(trace(p, "SimpleStmt")) 291 | } 292 | 293 | x := p.parseList(false) 294 | 295 | switch p.tok { 296 | case 297 | token.DEFINE, token.ASSIGN, token.ADD_ASSIGN, 298 | token.SUB_ASSIGN, token.MUL_ASSIGN, token.QUO_ASSIGN, 299 | token.REM_ASSIGN, token.AND_ASSIGN, token.OR_ASSIGN, 300 | token.XOR_ASSIGN, token.SHL_ASSIGN, token.SHR_ASSIGN, token.AND_NOT_ASSIGN: 301 | // assignment statement, possibly part of a range clause 302 | pos, tok := p.pos, p.tok 303 | p.next() 304 | var y []ast.Expr 305 | isRange := false 306 | if mode == rangeOk && p.tok == token.RANGE && (tok == token.DEFINE || tok == token.ASSIGN) { 307 | pos := p.pos 308 | p.next() 309 | y = []ast.Expr{&ast.UnaryExpr{OpPos: pos, Op: token.RANGE, X: p.parseRhs()}} 310 | isRange = true 311 | } else { 312 | y = p.parseList(true) 313 | } 314 | return &ast.AssignStmt{Lhs: x, TokPos: pos, Tok: tok, Rhs: y}, isRange 315 | } 316 | 317 | if len(x) > 1 { 318 | p.errorExpected(x[0].Pos(), "1 expression") 319 | // continue with first expression 320 | } 321 | 322 | switch p.tok { 323 | case token.COLON: 324 | // labeled statement 325 | colon := p.pos 326 | p.next() 327 | if label, isIdent := x[0].(*ast.Ident); mode == labelOk && isIdent { 328 | // Go spec: The scope of a label is the body of the function 329 | // in which it is declared and excludes the body of any nested 330 | // function. 331 | stmt := &ast.LabeledStmt{Label: label, Colon: colon, Stmt: p.parseStmt()} 332 | return stmt, false 333 | } 334 | // The label declaration typically starts at x[0].Pos(), but the label 335 | // declaration may be erroneous due to a token after that position (and 336 | // before the ':'). If SpuriousErrors is not set, the (only) error 337 | // reported for the line is the illegal label error instead of the token 338 | // before the ':' that caused the problem. Thus, use the (latest) colon 339 | // position for error reporting. 340 | p.error(colon, "illegal label declaration") 341 | return &ast.BadStmt{From: x[0].Pos(), To: colon + 1}, false 342 | 343 | case token.ARROW: 344 | // send statement 345 | arrow := p.pos 346 | p.next() 347 | y := p.parseRhs() 348 | return &ast.SendStmt{Chan: x[0], Arrow: arrow, Value: y}, false 349 | 350 | case token.INC, token.DEC: 351 | // increment or decrement 352 | s := &ast.IncDecStmt{X: x[0], TokPos: p.pos, Tok: p.token} 353 | p.next() 354 | return s, false 355 | } 356 | 357 | // expression 358 | return &ast.ExprStmt{X: x[0]}, false 359 | } 360 | 361 | func (p *parser) parseReturnStmt() *ast.ReturnStmt { 362 | pos := p.pos 363 | p.expect(token.RETURN) 364 | var x []ast.Expr 365 | if p.token != token.SEMICOLON && p.token != token.RBRACE { 366 | x = exprList(p, true) 367 | } 368 | p.expectSemi() 369 | 370 | return &ast.ReturnStmt{Return: pos, Results: x} 371 | } 372 | 373 | func (p *parser) parseBranchStmt(tok token.Token) *ast.BranchStmt { 374 | pos := p.expect(tok) 375 | var label *ast.Name 376 | if tok != token.FALLTHROUGH && p.token == token.IDENT { 377 | label = p.name() 378 | } 379 | p.expectSemi() 380 | 381 | return &ast.BranchStmt{TokPos: pos, Tok: tok, Label: label} 382 | } 383 | 384 | func (p *parser) makeExpr(s ast.Stmt, want string) ast.Expr { 385 | if s == nil { 386 | return nil 387 | } 388 | if es, isExpr := s.(*ast.ExprStmt); isExpr { 389 | return es.X 390 | } 391 | found := "simple statement" 392 | if _, isAss := s.(*ast.AssignStmt); isAss { 393 | found = "assignment" 394 | } 395 | p.error(s.Position(), fmt.Sprintf("expected %s, found %s (missing parentheses around composite literal?)", want, found)) 396 | return &ast.BadExpr{From: s.Position(), To: p.safePos(s.End())} 397 | } 398 | 399 | func (p *parser) parseIfHeader() (init ast.Stmt, cond ast.Expr) { 400 | if p.token == token.LBRACE { 401 | p.error(p.pos, "missing condition in if statement") 402 | cond = &ast.BadExpr{From: p.pos, To: p.pos} 403 | return 404 | } 405 | // p.tok != token.LBRACE 406 | 407 | prevLev := p.exprLev // 记录层级 408 | p.exprLev = -1 409 | 410 | if p.token != token.SEMICOLON { // 初始化语句 411 | // accept potential variable declaration but complain 412 | if p.token == token.LET { 413 | p.next() 414 | p.error(p.pos, "var declaration not allowed in if initializer") 415 | } 416 | init, _ = p.parseSimpleStmt(basic) 417 | } 418 | 419 | var condStmt ast.Stmt // 条件语句 420 | var semi struct { 421 | pos token.Pos 422 | lit string // ";" or "\n"; valid if pos.IsValid() 423 | } 424 | if p.token != token.LBRACE { // {} 425 | if p.token == token.SEMICOLON { // ; 426 | semi.pos = p.pos 427 | semi.lit = p.identifier 428 | p.next() 429 | } else { 430 | p.expect(token.SEMICOLON) 431 | } 432 | if p.token != token.LBRACE { // 条件语句, 可能是 if ; {} 433 | condStmt, _ = p.parseSimpleStmt(basic) 434 | } 435 | } else { 436 | condStmt = init 437 | init = nil 438 | } 439 | 440 | if condStmt != nil { 441 | cond = p.makeExpr(condStmt, "boolean expression") 442 | } else if semi.pos.IsValid() { 443 | if semi.lit == "\n" { 444 | p.error(semi.pos, "unexpected newline, expecting { after if clause") 445 | } else { 446 | p.error(semi.pos, "missing condition in if statement") 447 | } 448 | } 449 | 450 | // make sure we have a valid AST 451 | if cond == nil { 452 | cond = &ast.BadExpr{From: p.pos, To: p.pos} 453 | } 454 | 455 | p.exprLev = prevLev 456 | return 457 | } 458 | 459 | func (p *parser) parseIfStmt() *ast.IfStmt { 460 | defer decNestLev(incNestLev(p)) 461 | 462 | pos := p.expect(token.IF) 463 | 464 | init, cond := p.parseIfHeader() 465 | body := p.parseBody() // parseBlockStmt 466 | 467 | var else_ ast.Stmt 468 | if p.token == token.ELSE { 469 | p.next() 470 | switch p.token { 471 | case token.IF: 472 | else_ = p.parseIfStmt() 473 | case token.LBRACE: 474 | else_ = p.parseBlockStmt() 475 | p.expectSemi() 476 | default: 477 | p.errorExpected(p.pos, "if statement or block") 478 | else_ = &ast.BadStmt{From: p.pos, To: p.pos} 479 | } 480 | } else { 481 | p.expectSemi() 482 | } 483 | 484 | return &ast.IfStmt{If: pos, Init: init, Cond: cond, Body: body, Else: else_} 485 | } 486 | -------------------------------------------------------------------------------- /internal/os/elf/file.go: -------------------------------------------------------------------------------- 1 | package elf 2 | 3 | import ( 4 | "encoding/binary" 5 | "fmt" 6 | ) 7 | 8 | // Elf_Magic elf 文件魔术信息(32bit/64bit 通用) 9 | type Elf_Magic [EI_NIDENT]byte 10 | 11 | // 32位 ELF 文件头结构 12 | //type Header32 struct { 13 | // Magic [4]byte // ELF 魔数 0x7F, 0x45, 0x4C, 0x46 - 对应ASCII码为 \x7FELF 14 | // Class byte // 文件类型 (32/64位) 15 | // Data byte // 字节序 0x01:小端序(Little Endian),低字节在前 0x02:大端序(Big Endian),高字节在前 16 | // Version byte // ELF 版本 通常为0x01,表示原始ELF格式规范版本 17 | // OSABI byte // 操作系统 ABI(0x00:System V 0x01:HP-UX 0x02:NetBSD 0x03:Linux 0x06:Solaris 0x09:FreeBSD 0x0C:OpenBSD) 18 | // ABIVersion byte // ABI 版本(通常依赖于特定的ABI,对于System V通常为0x00) 19 | // 第10-16个字节 (9-15):填充字节 这些字节为保留字节,通常填充为0,保留供将来使用 20 | // Type uint16 // 文件类型 21 | // Machine uint16 // 机器类型 22 | // Entry uint32 // 程序入口点 23 | // Phoff uint32 // 程序头表偏移 24 | // Shoff uint32 // 节头表偏移 25 | // Flags uint32 // 处理器特定标志 26 | // Ehsize uint16 // ELF 头大小 27 | // Phentsize uint16 // 程序头表项大小 28 | // Phnum uint16 // 程序头表项数量 29 | // Shentsize uint16 // 节头表项大小 30 | // Shnum uint16 // 节头表项数量 31 | // Shstrndx uint16 // 节名字符串表索引 32 | //} 33 | 34 | func (m Elf_Magic) Bits() int { 35 | return int(m[4]) 36 | } 37 | 38 | func (m Elf_Magic) Endian() binary.ByteOrder { 39 | if m[5] == 1 { 40 | return binary.LittleEndian 41 | } else if m[5] == 2 { 42 | return binary.BigEndian 43 | } 44 | panic("不支持的字节序") 45 | } 46 | 47 | type Elf32_Phdr struct { 48 | Type Elf32_Word 49 | Offset Elf32_Off 50 | VAddr Elf32_Addr 51 | Paddr Elf32_Addr 52 | Filesz Elf32_Word 53 | Memsz Elf32_Word 54 | Flags Elf32_Word 55 | Align Elf32_Word 56 | } 57 | 58 | // Elf32_Ehdr ELF32文件头结构 59 | type Elf32_Ehdr struct { 60 | Magic Elf_Magic // (16)魔数和相关信息 61 | Type Elf32_Half // (2) 0 Unknown, 1 32-bit, 2 64-bit 62 | Machine Elf32_Half // (2) 架构类型 63 | Version Elf32_Word // (4) 0 或者 1 64 | Entry Elf32_Addr // (8) [32/64] 入口点虚拟地址(32bit 占32位 64bit占64位) 65 | Phoff Elf32_Off // (8) [32/64] 程序头表偏移(按位占用地址宽度) 66 | Shoff Elf32_Off // (8) [32/64] 节头表偏移(按位占用地址宽度) 67 | Flags Elf32_Word // (4) 处理器特定标志 68 | Ehsize Elf32_Half // (2) ELF头部大小 69 | Phentsize Elf32_Half // (2) 程序头表项大小 70 | Phnum Elf32_Half // (2) 程序头表项数量 71 | Shentsize Elf32_Half // (2) 节头表项大小 72 | Shnum Elf32_Half // (2) 节头表项数量 73 | Shstrndx Elf32_Half // (2) 节头字符串表索引 74 | } 75 | 76 | // Elf32_Shdr 段表项结构 77 | type Elf32_Shdr struct { 78 | Name Elf32_Word // 段名(4字节,存在于字符串表中的偏移量, shstrtab 也是一个段, shstrndx ) 79 | Type Elf32_Word // 段类型 (1表示程序段.text.data 2表示符号段.symtab 3表示串表段.shstrtab 8表示内容段.bss 9表示重定位表段.rel.text.rel.data) 80 | Flags Elf32_Word // 段标志 (0表示默认 1表示可写 2表示段加载后需要为之分配空间 4表示可执行) 81 | Addr Elf32_Addr // 段虚拟地址 可重定位文件默认为零, 可执行文件由链接器计算地址 82 | Offset Elf32_Off // 段在文件中的偏移 83 | Size Elf32_Word // 段的大小,字节单位, SHT_NOBITS 代表没有数据(此时指代加载后占用的内存大小) 84 | Link Elf32_Word // 段的链接信息,一般用于描述符号标段和重定位表段的链接信息。 85 | Info Elf32_Word // 附加信息 86 | Addralign Elf32_Word // 对齐要求 87 | Entsize Elf32_Word // 表项大小 88 | } 89 | 90 | func NewShdr(Type SectionType, Flags SectionFlag, Offset, Size int) *Elf32_Shdr { 91 | return &Elf32_Shdr{ 92 | Name: 0, 93 | Type: Elf32_Word(Type), 94 | Flags: Elf32_Word(Flags), 95 | Addr: 0, 96 | Offset: Elf32_Off(Offset), 97 | Size: Elf32_Word(Size), 98 | Link: 0, 99 | Info: 0, 100 | Addralign: 4, 101 | Entsize: 0, 102 | } 103 | } 104 | 105 | // Elf32_Sym ELF32符号表项结构 106 | type Elf32_Sym struct { 107 | Name uint32 // 符号名 108 | Value uint32 // 符号值 109 | Size uint32 // 符号大小 110 | Info byte // 符号类型和绑定信息 111 | Other byte // 保留 112 | Shndx uint16 // 符号所在节 113 | } 114 | 115 | // Elf32_Rel ELF32重定位表项结构 116 | type Elf32_Rel struct { 117 | Offset uint32 118 | Info uint32 119 | } 120 | 121 | type Elf32_RelInfo struct { 122 | SegName string // 重定位的目标段名 123 | Rel *Elf32_Rel // 重定位信息 124 | RelName string // 符号名称 125 | } 126 | 127 | // File elf文件类,包含elf文件的重要内容,处理elf文件 128 | type File struct { 129 | Ehdr *Elf32_Ehdr // ELF文件头 130 | PhdrTab []*Elf32_Phdr // 程序头表! 131 | ShdrTab map[string]*Elf32_Shdr // 段表映射 132 | ShdrNames []string // 段名列表, 段表名和索引的映射关系,方便符号查询自己的段信息 133 | SymTab map[string]*Elf32_Sym // 符号表映射 134 | SymNames []string // 符号名列表, 符号名与符号表项索引的映射关系,对于重定位表生成重要 135 | RelTab []*Elf32_RelInfo // 重定位信息列表,// 省略 辅助数据 char *elf_dir; // 处理elf文件的目录 136 | Name string // 文件名称 137 | Reader BytesReader // 缓存s 138 | Shstrtab []byte // 段表字符串表数据 139 | ShstrtabSize int // 段表字符串表长 140 | Strtab []byte // 字符串表数据 141 | StrtabSize int // 字符串表长 142 | ProgSegList []*ProgSeg // 程序头表缓存数据 143 | } 144 | 145 | func NewElfFile(magic Elf_Magic, eType, eMachine Elf32_Half) *File { 146 | file := &File{ 147 | Ehdr: &Elf32_Ehdr{ 148 | Magic: magic, // 这个字段比较复杂 149 | Type: eType, // 文件类型: 1表示可重定位, 2表示可执行 3表示共享目标 4 表示核心转储 0 表示无效 150 | Machine: eMachine, // 机器类型 151 | Version: Elf32_Word(EV_CURRENT), // 文件版本 一般取1 152 | Entry: 0, // 程序入口的线性地址,一般用于可以执行文件, 可重定向文件该字段为 0 153 | Phoff: 0, // 程序头表在文件内的偏移地址, 标识了程序头表在文件内的位置 154 | Flags: 0, // 文件平台相关属性, 一般默认为 0 (x86 应该没用到) 155 | Ehsize: 52, // 文件头的大小 (跟系统位数有关 32位52字节 64位64字节) 156 | Phentsize: 0, // 程序头表项的大小 157 | Phnum: 0, // 程序头表项的个数,确定程序头表在文件[phoff: phoff + phentsize*phnum] 的数据块中 158 | Shentsize: 40, // 段表项的大小 159 | Shnum: 0, // 段表项的个数, 确定数据区块存在于 [shoff:shoff+shentsize*eshnum] 中 160 | Shstrndx: 0, // .shstrtab的索引 161 | }, 162 | ShdrTab: make(map[string]*Elf32_Shdr), 163 | ShdrNames: make([]string, 0), 164 | SymTab: make(map[string]*Elf32_Sym), 165 | SymNames: make([]string, 0), 166 | RelTab: make([]*Elf32_RelInfo, 0), 167 | Shstrtab: make([]byte, 0), 168 | Strtab: make([]byte, 0), 169 | ProgSegList: make([]*ProgSeg, 0), 170 | } 171 | 172 | // 初始化ELF魔数 173 | //Ehdr.Magic[0] = 0x7F // DEL 174 | //Ehdr.Magic[1] = 'E' // . 175 | //Ehdr.Magic[2] = 'L' // . 176 | //Ehdr.Magic[3] = 'F' // . 177 | //Ehdr.Magic[4] = 1 // Class 32位格式 64位(2) 0表示无效 178 | //Ehdr.Magic[5] = 1 // 小端序 大端(2) 0表示无效 179 | //Ehdr.Magic[6] = 1 // ELF版本 默认为1 180 | // 后面9字节在ELF标准中未定义, 一般用于平台相关的扩展标志 181 | // 第8字节 取0 表示 unix 系统 182 | // 第9字节 取0 表示系统 ABI 版本为 0 183 | // 其它字节默认为 0 184 | 185 | // 添加空节表项(重定位文件和可执行文件都有) 186 | file.AddShdr("", &Elf32_Shdr{}) 187 | 188 | // 添加空符号表项 189 | file.AddSym("", nil) 190 | 191 | return file 192 | } 193 | 194 | func (e *File) Bits() int { 195 | return e.Ehdr.Magic.Bits() 196 | } 197 | 198 | func (e *File) Endian() binary.ByteOrder { return e.Ehdr.Magic.Endian() } 199 | 200 | func (e *File) AddShdr(shName string, shdr *Elf32_Shdr) { 201 | if shdr != nil { 202 | e.ShdrTab[shName] = shdr 203 | } 204 | e.ShdrNames = append(e.ShdrNames, shName) 205 | } 206 | 207 | // AddShdrSec sh_name和sh_offset都需要重新计算 todo 208 | func (e *File) AddShdrSec(section *Section, offset int) { 209 | if section.Name == ".text" { 210 | e.AddShdr(section.Name, 211 | NewShdr(SHT_PROGBITS, SHF_ALLOC|SHF_EXECINSTR, offset, section.Length), 212 | ) 213 | } else if section.Name == ".data" { 214 | e.AddShdr(section.Name, 215 | NewShdr(SHT_PROGBITS, SHF_ALLOC|SHF_WRITE, offset, section.Length), 216 | ) 217 | } else if section.Name == ".bss" { // 非必须 218 | // 关于 .bss 段: 用于存储未初始化的全局变量和静态变量 219 | // 特点:在程序价值时会被自动初始化为 0 220 | // 优势:节省可执行文件空间,只占用很少部分(通常只记录大小) 221 | // 场景:大小数组或缓冲区的申明, 未初始化的全局变量,未初始化的静态局部变量, 需要零初始化的数据结构 222 | // 语法: buffer: resw 1024 // 记录 Buffer 符号 需要 resw 宽度 * 1024 空间 (resw 等同于 dw) 223 | e.AddShdr(section.Name, 224 | NewShdr(SHT_NOBITS, SHF_ALLOC|SHF_WRITE, offset, section.Length), 225 | ) 226 | } 227 | } 228 | 229 | // AddPhdrRec 添加程序头表 230 | func (e *File) AddPhdr(t Elf32_Word, off Elf32_Off, vaddr Elf32_Addr, filesz, memsz, flags, align Elf32_Word) { 231 | ph := &Elf32_Phdr{ 232 | Type: t, 233 | Offset: off, 234 | VAddr: vaddr, 235 | Filesz: filesz, 236 | Memsz: memsz, 237 | Flags: flags, 238 | Align: align, 239 | } 240 | e.PhdrTab = append(e.PhdrTab, ph) 241 | } 242 | 243 | // AddProgSeg 添加程序头表, 同时添加段表 244 | func (e *File) AddProgSeg(name string, seg *ProgSeg) { 245 | flags := PF_W | PF_R // 可读、可写 246 | filesz := seg.Size // 占用磁盘大小(合并后的大小) 247 | if name == ".text" { 248 | flags = PF_X | PF_R //.text段可读可执行 249 | } 250 | if name == ".bss" { 251 | filesz = 0 // .bss段不占磁盘空间 252 | } 253 | 254 | seg.Name = name 255 | e.ProgSegList = append(e.ProgSegList, seg) 256 | e.AddPhdr(Elf32_Word(PT_LOAD), seg.Offset, seg.BaseAddr, 257 | filesz, seg.Size, Elf32_Word(flags), MemAlign) 258 | 259 | shType := SHT_PROGBITS 260 | shFlags := SHF_ALLOC | SHF_WRITE 261 | shAlign := 4 //4B 262 | if name == ".bss" { 263 | shType = SHT_NOBITS 264 | } 265 | if name == ".text" { 266 | shFlags = SHF_ALLOC | SHF_EXECINSTR 267 | shAlign = 16 268 | } 269 | // 添加程序头表也要添加对应的段 270 | //添加一个段表项,暂时按照4字节对齐 271 | shdr := NewShdr(shType, shFlags, int(seg.Offset), int(seg.Size)) 272 | shdr.Addr = seg.BaseAddr 273 | shdr.Addralign = Elf32_Word(shAlign) 274 | e.AddShdr(name, shdr) 275 | } 276 | 277 | func (e *File) AddSym(name string, sym *Elf32_Sym) { 278 | target := &Elf32_Sym{ 279 | Name: 0, 280 | Value: 0, 281 | Size: 0, 282 | Info: 0, 283 | Other: 0, 284 | Shndx: 0, 285 | } 286 | if name != "" { 287 | target.Value = sym.Value 288 | target.Size = sym.Size 289 | target.Info = sym.Info 290 | target.Other = sym.Other 291 | target.Shndx = sym.Shndx 292 | } 293 | e.SymTab[name] = target 294 | e.SymNames = append(e.SymNames, name) 295 | } 296 | 297 | func (e *File) AddRel(info *Elf32_RelInfo) { 298 | e.RelTab = append(e.RelTab, info) 299 | } 300 | 301 | func (e *File) GetSegIndex(seg string) int { 302 | for i, name := range e.ShdrNames { 303 | if name == seg { 304 | return i 305 | } 306 | } 307 | return -1 308 | } 309 | 310 | func (e *File) GetSymIndex(sym string) int { 311 | for i, name := range e.SymNames { 312 | if name == sym { 313 | return i 314 | } 315 | } 316 | return -1 317 | } 318 | 319 | func (e *File) ReadData(offset Elf32_Off, size Elf32_Word) []byte { 320 | return e.Reader.Data(int(offset), int(size)) 321 | } 322 | 323 | func (e *File) ReadDataBy(seg string) []byte { 324 | section := e.ShdrTab[seg] 325 | return e.Reader.Data(int(section.Offset), int(section.Size)) 326 | } 327 | 328 | func (e *File) WriteFile(target string) error { 329 | return FileWrite(e, target) 330 | } 331 | 332 | /* 333 | dir:输出目录 334 | flag:1-第一次写,文件头+PHT;2-第二次写,段表字符串表+段表+符号表+字符串表; 335 | */ 336 | //void Elf_file::writeElf(const char*dir,int flag) 337 | //{ 338 | //if(flag==1) 339 | //{ 340 | //FILE*fp=fopen(dir,"w+"); 341 | //fwrite(&ehdr,ehdr.e_ehsize,1,fp);//elf文件头 342 | //if(!phdrTab.empty())//程序头表 343 | //{ 344 | //for(int i=0;i 0 { 429 | // sh := e.ShdrNames[sym.Shndx] 430 | // segment = fmt.Sprintf("%s,%s", segment, sh) 431 | // } 432 | // w.Append([]string{ 433 | // fmt.Sprintf("0x%x", offset+i*16), 434 | // fmt.Sprintf("[%d]", i), 435 | // name, 436 | // fmt.Sprintf("0x%x", sym.Value), 437 | // fmt.Sprintf("%d bytes", sym.Size), 438 | // fmt.Sprintf("0b%b", sym.Info), 439 | // fmt.Sprintf("0b%b", sym.Other), 440 | // segment, 441 | // }) 442 | //} 443 | //w.Render() 444 | 445 | // 打印重定位表【段】 446 | 447 | // todo 循环遍历,依次打印段信息 448 | 449 | } 450 | 451 | //// GetData GetSectionData 获取节数据 452 | //func (f *ElfFile) GetData(seg *Elf32_Shdr) ([]byte, error) { 453 | // offset := uint64(seg.Offset) 454 | // size := uint64(seg.Size) 455 | // os.Open() // 读取数据 456 | // 457 | // data := make([]byte, size) 458 | // if _, err := f.FileHandle.Seek(int64(offset), 0); err != nil { 459 | // fmt.Printf("[DEBUG] 错误: 定位到节偏移失败: %v\n", err) 460 | // return nil, err 461 | // } 462 | // if _, err := io.ReadFull(f.FileHandle, data); err != nil { 463 | // fmt.Printf("[DEBUG] 错误: 读取节数据失败: %v\n", err) 464 | // return nil, err 465 | // } 466 | // fmt.Printf("[DEBUG] 成功读取节数据, 大小: %d\n", len(data)) 467 | // return data, nil 468 | //} 469 | -------------------------------------------------------------------------------- /compiler/compile/parser/types.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "github.com/facelang/face/compiler/compile/ast" 5 | "github.com/facelang/face/compiler/compile/tokens" 6 | "github.com/facelang/face/internal/prog" 7 | "go/token" 8 | ) 9 | 10 | // NewIndirect 指针类型 todo, 暂时忽略 11 | func NewIndirect(pos prog.FilePos, typ prog.Expr) prog.Expr { 12 | o := new(prog.Operation) 13 | o.pos = pos 14 | o.Op = Mul 15 | o.X = typ 16 | return o 17 | } 18 | 19 | // FuncType If context != "", type parameters are not permitted. 20 | func FuncType(p *parser, context string) ([]*prog.Field, *prog.FuncType) { 21 | 22 | typ := new(prog.FuncType) 23 | typ.pos = p.FilePos 24 | 25 | var tparamList []*prog.Field 26 | // 目标语法使用 尖括号 27 | //if p.got(api.LBRACK) { // [] 泛型 func [] name(args) 28 | // if context != "" { 29 | // // accept but complain 30 | // p.syntaxErrorAt(typ.pos, context+" must have no type parameters") 31 | // } 32 | // if p.tok == _Rbrack { 33 | // p.syntaxError("empty type parameter list") 34 | // p.next() 35 | // } else { 36 | // tparamList = p.paramList(nil, nil, _Rbrack, true) 37 | // } 38 | //} 39 | 40 | p.want(LPAREN) 41 | typ.ParamList = p.paramList(nil, nil, _Rparen, false) 42 | typ.ResultList = p.funcResult() 43 | 44 | return tparamList, typ 45 | } 46 | 47 | // TypeOrNil is like type_ but it returns nil if there was no type 48 | // instead of reporting an error. 49 | // 50 | // Type = TypeName | TypeLit | "(" Type ")" . 51 | // TypeName = identifier | QualifiedIdent . 52 | // TypeLit = ArrayType | StructType | PointerType | FunctionType | InterfaceType | 53 | // SliceType | MapType | Channel_Type . 54 | func TypeOrNil(p *parser) prog.Expr { 55 | //defer decNestLev(incNestLev(p)) // 递归统计,避免递归太深 56 | switch p.token { 57 | case token.IDENT: 58 | typ := p.parseTypeName(nil) 59 | if p.tok == token.LBRACK { 60 | typ = p.parseTypeInstance(typ) 61 | } 62 | return typ 63 | case LBRACK: 64 | lbrack := p.expect(LBRACK) 65 | return p.parseArrayType(lbrack, nil) 66 | case STRUCT: 67 | return p.parseStructType() 68 | case MUL: 69 | return p.parsePointerType() 70 | case FUNC: 71 | return p.parseFuncType() 72 | case INTERFACE: 73 | return p.parseInterfaceType() 74 | case MAP: 75 | return p.parseMapType() 76 | case CHAN, ARROW: 77 | return p.parseChanType() 78 | case LPAREN: 79 | lparen := p.pos 80 | p.next() 81 | typ := p.parseType() 82 | rparen := p.expect(RPAREN) 83 | return &ast.ParenExpr{Lparen: lparen, X: typ, Rparen: rparen} 84 | } 85 | 86 | // no type found 87 | return nil 88 | } 89 | 90 | func RequireType(p *parser) prog.Expr { 91 | typ := TypeOrNil(p) 92 | if typ == nil { 93 | p.unexpect("type") 94 | } 95 | return typ 96 | } 97 | 98 | /** 99 | 函数类型: let v1 func(string) 100 | 函数类型: let v2 (string) => string 101 | 数组类型: let v3 array 102 | 字典类型: let v4 map 103 | 基本数据类型: let v5 int [string, float] 104 | 其它自定义类型: let v6 http.Http [或其它类型别名] 105 | */ 106 | 107 | func (p *parser) parseTypeInstance(typ ast.Expr) ast.Expr { 108 | opening := p.expect(token.LBRACK) // [ 109 | //p.exprLev++ 110 | var list []ast.Expr 111 | for p.token != token.RBRACK && p.token != token.EOF { 112 | list = append(list, p.parseType()) 113 | if p.token != token.COMMA { 114 | break 115 | } 116 | p.next() 117 | } 118 | //p.exprLev-- 119 | 120 | closing := p.expect(token.RBRACK) // ] 121 | 122 | if len(list) == 0 { 123 | p.unexpect("type argument list") 124 | return &ast.IndexExpr{ 125 | X: typ, 126 | Lbrack: opening, 127 | Index: &ast.BadExpr{From: opening + 1, To: closing}, 128 | Rbrack: closing, 129 | } 130 | } 131 | 132 | return packIndexExpr(typ, opening, list, closing) 133 | } 134 | 135 | // If the result is an identifier, it is not resolved. 136 | func (p *parser) parseTypeName(ident *ast.Name) ast.Expr { 137 | if ident == nil { 138 | ident = p.name() 139 | } 140 | 141 | if p.token == token.PERIOD { 142 | p.next() 143 | sel := p.name() 144 | return &ast.SelectorExpr{X: ident, Sel: sel} 145 | } 146 | 147 | return ident 148 | } 149 | 150 | // "[" has already been consumed, and lbrack is its position. 151 | // If len != nil it is the already consumed array length. 152 | func (p *parser) parseArrayType(lbrack token.Pos, len ast.Expr) *ast.ArrayType { 153 | 154 | if len == nil { // 没有解析 [x] 中间的参数 155 | //p.exprLev++ 156 | // always permit ellipsis for more fault-tolerant parsing 157 | if p.token == token.ELLIPSIS { // [...] 158 | len = &ast.Ellipsis{Ellipsis: p.pos} 159 | p.next() 160 | } else if p.token != token.RBRACK { // [len] 161 | len = exprRhs(p) 162 | } 163 | // len 可能为 nil 164 | //p.exprLev-- 165 | } 166 | if p.token == token.COMMA { // , 不应该出现 167 | // Trailing commas are accepted in type parameter 168 | // lists but not in array type declarations. 169 | // Accept for better error handling but complain. 170 | p.error(p.pos, "unexpected comma; expecting ]") 171 | p.next() 172 | } 173 | p.expect(token.RBRACK) // ] 结束符 174 | elt := p.parseType() // 可能是多维数组 175 | return &ast.ArrayType{Lbrack: lbrack, Len: len, Elt: elt} 176 | } 177 | 178 | func (p *parser) parseMapType() *ast.MapType { 179 | pos := p.expect(token.MAP) // map 180 | p.expect(token.LBRACK) // [ 181 | key := p.parseType() // keyType 182 | p.expect(token.RBRACK) // ] 183 | value := p.parseType() // valType 184 | 185 | return &ast.MapType{Map: pos, Key: key, Value: value} 186 | } 187 | 188 | func (p *parser) parseQualifiedIdent(ident *ast.Name) ast.Expr { 189 | 190 | typ := p.parseTypeName(ident) 191 | if p.token == token.LBRACK { 192 | typ = p.parseTypeInstance(typ) 193 | } 194 | 195 | return typ 196 | } 197 | 198 | func (p *parser) parseArrayFieldOrTypeInstance(x *ast.Name) (*ast.Name, ast.Expr) { 199 | lbrack := p.expect(token.LBRACK) 200 | trailingComma := token.NoPos // if valid, the position of a trailing comma preceding the ']' 201 | var args []ast.Expr 202 | if p.token != token.RBRACK { 203 | //p.exprLev++ 204 | args = append(args, exprRhs(p)) 205 | for p.token == token.COMMA { 206 | comma := p.pos 207 | p.next() 208 | if p.token == token.RBRACK { 209 | trailingComma = comma 210 | break 211 | } 212 | args = append(args, exprRhs(p)) 213 | } 214 | //p.exprLev-- 215 | } 216 | rbrack := p.expect(token.RBRACK) 217 | 218 | if len(args) == 0 { 219 | // x []E 220 | elt := p.parseType() 221 | return x, &ast.ArrayType{Lbrack: lbrack, Elt: elt} 222 | } 223 | 224 | // x [P]E or x[P] 225 | if len(args) == 1 { 226 | elt := p.tryIdentOrType() 227 | if elt != nil { 228 | // x [P]E 229 | if trailingComma.IsValid() { 230 | // Trailing commas are invalid in array type fields. 231 | p.error(trailingComma, "unexpected comma; expecting ]") 232 | } 233 | return x, &ast.ArrayType{Lbrack: lbrack, Len: args[0], Elt: elt} 234 | } 235 | } 236 | 237 | // x[P], x[P1, P2], ... 238 | return nil, packIndexExpr(x, lbrack, args, rbrack) 239 | } 240 | 241 | // 只在结构体中 242 | func (p *parser) parseFieldDecl() *ast.Field { 243 | 244 | //doc := p.leadComment 245 | 246 | var names []*ast.Name 247 | var typ ast.Expr 248 | switch p.token { 249 | case token.IDENT: // 先解析字段名 250 | name := p.name() 251 | if p.token == token.PERIOD || p.token == token.STRING || p.token == token.SEMICOLON || p.token == token.RBRACE { 252 | // embedded type 253 | // 继续解析 name. . "" ; } 254 | typ = name 255 | if p.token == token.PERIOD { 256 | typ = p.parseQualifiedIdent(name) 257 | } 258 | } else { // 其它符号 259 | // name1, name2, ... T 260 | names = []*ast.Name{name} 261 | for p.token == token.COMMA { // struct { a, b, c int } 262 | p.next() 263 | names = append(names, p.name()) 264 | } 265 | // Careful dance: We don't know if we have an embedded instantiated 266 | // type T[P1, P2, ...] or a field T of array type []E or [P]E. 267 | // { a } 268 | if len(names) == 1 && p.token == token.LBRACK { 269 | name, typ = p.parseArrayFieldOrTypeInstance(name) // todo 270 | if name == nil { 271 | names = nil 272 | } 273 | } else { 274 | // T P 275 | typ = p.parseType() 276 | } 277 | } 278 | case token.MUL: 279 | star := p.pos 280 | p.next() 281 | if p.token == token.LPAREN { 282 | // *(T) 283 | p.error(p.pos, "cannot parenthesize embedded type") 284 | p.next() 285 | typ = p.parseQualifiedIdent(nil) 286 | // expect closing ')' but no need to complain if missing 287 | if p.token == token.RPAREN { 288 | p.next() 289 | } 290 | } else { 291 | // *T 292 | typ = p.parseQualifiedIdent(nil) 293 | } 294 | typ = &ast.StarExpr{Star: star, X: typ} 295 | 296 | case token.LPAREN: 297 | p.error(p.pos, "cannot parenthesize embedded type") 298 | p.next() 299 | if p.token == token.MUL { 300 | // (*T) 301 | star := p.pos 302 | p.next() 303 | typ = &ast.StarExpr{Star: star, X: p.parseQualifiedIdent(nil)} 304 | } else { 305 | // (T) 306 | typ = p.parseQualifiedIdent(nil) 307 | } 308 | // expect closing ')' but no need to complain if missing 309 | if p.token == token.RPAREN { 310 | p.next() 311 | } 312 | 313 | default: 314 | pos := p.pos 315 | p.unexpect("field name or embedded type") 316 | typ = &ast.BadExpr{From: pos, To: p.pos} 317 | } 318 | 319 | var tag *ast.BasicLit 320 | if p.token == token.STRING { 321 | tag = &ast.BasicLit{Pos: p.pos, Kind: p.token, Value: p.identifier} 322 | p.next() 323 | } 324 | 325 | field := &ast.Field{Names: names, Type: typ, Tag: tag} 326 | return field 327 | } 328 | 329 | func (p *parser) parseStructType() *ast.StructType { 330 | pos := p.expect(token.STRUCT) // struct {} 331 | lbrace := p.expect(token.LBRACE) 332 | var list []*ast.Field 333 | for p.token == token.IDENT || p.token == token.MUL || p.token == token.LPAREN { 334 | // a field declaration cannot start with a '(' but we accept 335 | // it here for more robust parsing and better error messages 336 | // (parseFieldDecl will check and complain if necessary) 337 | list = append(list, p.parseFieldDecl()) 338 | } 339 | rbrace := p.expect(token.RBRACE) 340 | 341 | return &ast.StructType{ 342 | Struct: pos, 343 | Fields: &ast.FieldList{ 344 | Opening: lbrace, 345 | List: list, 346 | Closing: rbrace, 347 | }, 348 | } 349 | } 350 | 351 | func (p *parser) parsePointerType() *ast.StarExpr { 352 | star := p.expect(token.MUL) 353 | base := p.parseType() 354 | 355 | return &ast.StarExpr{Star: star, X: base} 356 | } 357 | 358 | func (p *parser) parseMethodSpec() *ast.Field { 359 | var idents []*ast.Name 360 | var typ ast.Expr 361 | x := p.parseTypeName(nil) 362 | if ident, _ := x.(*ast.Name); ident != nil { 363 | switch { 364 | case p.token == token.LBRACK: 365 | // generic method or embedded instantiated type 366 | lbrack := p.pos 367 | p.next() 368 | //p.exprLev++ 369 | x := expr(p) 370 | //p.exprLev-- 371 | if name0, _ := x.(*ast.Name); name0 != nil && p.token != token.COMMA && p.token != token.RBRACK { 372 | // generic method m[T any] 373 | // 374 | // Interface methods do not have type parameters. We parse them for a 375 | // better error message and improved error recovery. 376 | _ = p.parseParameterList(name0, nil, token.RBRACK) 377 | _ = p.expect(token.RBRACK) 378 | p.error(lbrack, "interface method must have no type parameters") 379 | 380 | // TODO(rfindley) refactor to share code with parseFuncType. 381 | _, params := p.parseParameters(false) 382 | results := p.parseResult() 383 | idents = []*ast.Name{ident} 384 | typ = &ast.FuncType{ 385 | Func: token.NoPos, 386 | Params: params, 387 | Results: results, 388 | } 389 | } else { 390 | // embedded instantiated type 391 | // TODO(rfindley) should resolve all identifiers in x. 392 | list := []ast.Expr{x} 393 | if p.token == token.COMMA { 394 | //p.exprLev++ 395 | p.next() 396 | for p.token != token.RBRACK && p.token != token.EOF { 397 | list = append(list, p.parseType()) 398 | if p.token != token.COMMA { 399 | break 400 | } 401 | p.next() 402 | } 403 | //p.exprLev-- 404 | } 405 | rbrack := p.expectClosing(token.RBRACK, "type argument list") 406 | typ = packIndexExpr(ident, lbrack, list, rbrack) 407 | } 408 | case p.token == token.LPAREN: 409 | // ordinary method 410 | // TODO(rfindley) refactor to share code with parseFuncType. 411 | _, params := p.parseParameters(false) 412 | results := p.parseResult() 413 | idents = []*ast.Ident{ident} 414 | typ = &ast.FuncType{Func: token.NoPos, Params: params, Results: results} 415 | default: 416 | // embedded type 417 | typ = x 418 | } 419 | } else { 420 | // embedded, possibly instantiated type 421 | typ = x 422 | if p.token == token.LBRACK { 423 | // embedded instantiated interface 424 | typ = p.parseTypeInstance(typ) 425 | } 426 | } 427 | 428 | return &ast.Field{Names: idents, Type: typ} 429 | } 430 | 431 | func (p *parser) embeddedElem(x ast.Expr) ast.Expr { 432 | if x == nil { 433 | x = p.embeddedTerm() 434 | } 435 | for p.token == token.OR { 436 | t := new(ast.BinaryExpr) 437 | t.OpPos = p.pos 438 | t.Op = token.OR 439 | p.next() 440 | t.X = x 441 | t.Y = p.embeddedTerm() 442 | x = t 443 | } 444 | return x 445 | } 446 | 447 | func (p *parser) embeddedTerm() ast.Expr { 448 | if p.token == token.TILDE { 449 | t := new(ast.UnaryExpr) 450 | t.OpPos = p.pos 451 | t.Op = token.TILDE 452 | p.next() 453 | t.X = p.parseType() 454 | return t 455 | } 456 | 457 | t := p.tryIdentOrType() 458 | if t == nil { 459 | pos := p.pos 460 | p.unexpect("~ term or type") 461 | return &ast.BadExpr{From: pos, To: p.pos} 462 | } 463 | 464 | return t 465 | } 466 | 467 | func (p *parser) parseInterfaceType() *ast.InterfaceType { 468 | pos := p.expect(token.INTERFACE) // interface {} 469 | lbrace := p.expect(token.LBRACE) 470 | 471 | var list []*ast.Field 472 | 473 | parseElements: 474 | for { 475 | switch { 476 | case p.token == token.IDENT: // 只能声明函数 477 | f := p.parseMethodSpec() 478 | if f.Names == nil { 479 | f.Type = p.embeddedElem(f.Type) 480 | } 481 | f.Comment = p.expectSemi() 482 | list = append(list, f) 483 | case p.token == token.TILDE: 484 | typ := p.embeddedElem(nil) 485 | comment := p.expectSemi() 486 | list = append(list, &ast.Field{Type: typ, Comment: comment}) 487 | default: 488 | if t := p.tryIdentOrType(); t != nil { 489 | typ := p.embeddedElem(t) 490 | comment := p.expectSemi() 491 | list = append(list, &ast.Field{Type: typ, Comment: comment}) 492 | } else { 493 | break parseElements 494 | } 495 | } 496 | } 497 | 498 | // TODO(rfindley): the error produced here could be improved, since we could 499 | // accept an identifier, 'type', or a '}' at this point. 500 | rbrace := p.expect(token.RBRACE) 501 | 502 | return &ast.InterfaceType{ 503 | Interface: pos, 504 | Methods: &ast.FieldList{ 505 | Opening: lbrace, 506 | List: list, 507 | Closing: rbrace, 508 | }, 509 | } 510 | } 511 | 512 | func (p *parser) tryIdentOrType() ast.Expr { 513 | defer decNestLev(incNestLev(p)) 514 | 515 | switch p.token { 516 | case token.IDENT: 517 | typ := p.parseTypeName(nil) // 可能是 x.name(包名) 或者 x 518 | if p.token == token.LBRACK { // x[] 519 | typ = p.parseTypeInstance(typ) // todo 520 | } 521 | return typ 522 | case token.LBRACK: 523 | lbrack := p.expect(token.LBRACK) // n[] 524 | return p.parseArrayType(lbrack, nil) 525 | case token.STRUCT: 526 | return p.parseStructType() 527 | case token.MUL: 528 | return p.parsePointerType() 529 | case token.FUNC: 530 | return p.parseFuncType() 531 | case token.INTERFACE: 532 | return p.parseInterfaceType() 533 | case token.MAP: 534 | return p.parseMapType() 535 | //case tokens.CHAN, tokens.ARROW: 536 | // return p.parseChanType() 537 | case token.LPAREN: // ( 538 | lparen := p.pos 539 | p.next() 540 | typ := p.parseType() 541 | rparen := p.expect(token.RPAREN) 542 | return &ast.ParenExpr{Lparen: lparen, X: typ, Rparen: rparen} 543 | } 544 | 545 | // no type found 546 | return nil 547 | } 548 | 549 | func (p *parser) parseType() ast.Expr { 550 | typ := p.tryIdentOrType() 551 | 552 | if typ == nil { 553 | pos := p.pos 554 | p.unexpect("type") 555 | return &ast.BadExpr{From: pos, To: p.pos} 556 | } 557 | 558 | return typ 559 | } 560 | -------------------------------------------------------------------------------- /compiler/assemble/internal/parser.go: -------------------------------------------------------------------------------- 1 | package internal 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "github.com/facelang/face/compiler/compile/token" 7 | "github.com/facelang/face/internal/os/elf" 8 | "github.com/facelang/face/internal/utils" 9 | "go/ast" 10 | "io" 11 | "os" 12 | "sort" 13 | "strconv" 14 | "strings" 15 | "text/scanner" 16 | ) 17 | 18 | // 重定位类型常量 19 | const ( 20 | R_386_32 = 1 // 绝对寻址 21 | R_386_PC32 = 2 // 相对寻址 22 | ) 23 | 24 | type section struct { 25 | Name string 26 | Offset, Length int 27 | } 28 | 29 | type relocate struct { 30 | Label string // 重定位符号的名称 31 | Type int // 重定位类型0-R_386_32;1-R_386_PC32 32 | Offset int // 重定位位置的偏移 33 | Section string // 重定位目标段 34 | } 35 | 36 | type parser struct { 37 | *lexer // 词法解析器 38 | token Token // 符号类型 39 | error error // 错误信息 40 | declList []*ast.GenDecl // 语句列表 41 | sec *section // 当前段 42 | secList []*section // 所有段列表 43 | instrList []*instr // 指令列表 44 | labelList []*label // 符号表 45 | labelNames map[string]int // 符号表,名称映射 46 | relocateList []*relocate // 重定位表 47 | 48 | //lineNum int // Line number in source file. 49 | //errorLine int // Line number of last error. 50 | //errorCount int // Number of errors. 51 | //sawCode bool // saw code in this file (as opposed to comments and blank lines) 52 | //pc int64 // virtual PC; count of Progs; doesn't advance for GLOBL or DATA. 53 | //input []lex.Token 54 | //inputPos int 55 | //pendingLabels []string // Labels to attach to next instruction. 56 | //labels map[string]*obj.Prog 57 | //toPatch []Patch 58 | //addr []obj.Addr 59 | //ctxt *obj.Link 60 | //firstProg *obj.Prog 61 | //lastProg *obj.Prog 62 | //dataAddr map[string]int64 // Most recent address for DATA for this symbol. 63 | //isJump bool // Instruction being assembled is a jump. 64 | //allowABI bool // Whether ABI selectors are allowed. 65 | //pkgPrefix string // Prefix to add to local symbols. 66 | //errorWriter io.Writer 67 | } 68 | 69 | //func (p *Parser) prefix() (string, bool) { 70 | // var token tokens.Token 71 | // for { 72 | // token = p.lex.NextToken() 73 | // if token == tokens.EOF { 74 | // return "", false 75 | // } 76 | // if token != tokens.COMMENT { 77 | // break 78 | // } 79 | // } 80 | // 81 | // if token == tokens.IDENT { 82 | // panic(fmt.Errorf("unexpected token %s", "IDENT")) 83 | // } 84 | // 85 | // return p.lex.ident, true 86 | //} 87 | 88 | func (p *parser) _addRel(label string, relType int) { 89 | p.relocateList = append( 90 | p.relocateList, 91 | &relocate{ 92 | Label: label, // 重定位符号的名称 93 | Type: relType, // 重定位类型0-R_386_32;1-R_386_PC32 94 | Offset: p.sec.Offset, // 重定位位置的偏移 95 | Section: p.sec.Name, // 重定位目标段 96 | }, 97 | ) 98 | } 99 | 100 | // 段落切换 101 | func (p *parser) _switch(id string) { 102 | p.secList = append( 103 | p.secList, 104 | §ion{ 105 | Name: p.sec.Name, 106 | Length: p.sec.Offset, // 结束位置,也代表大小, 先不记录偏移 107 | }, 108 | ) 109 | 110 | p.sec.Name = id // 切换到下一个段 111 | p.sec.Offset = 0 // 清0段偏移 112 | } 113 | 114 | // ---------------------------------------------------------------------------------- 115 | // -- parser start 116 | 117 | func (p *parser) errorf(format string, args ...interface{}) { 118 | p.error = fmt.Errorf(format, args...) 119 | panic(p.error) 120 | } 121 | 122 | func (p *parser) next() { 123 | p.token = p.lexer.NextToken() 124 | for p.token == COMMENT { 125 | p.token = p.lexer.NextToken() 126 | } 127 | } 128 | 129 | func (p *parser) got(token Token) bool { 130 | if p.token == token { 131 | p.next() 132 | return true 133 | } 134 | return false 135 | } 136 | 137 | func (p *parser) expect(tokens ...Token) Pos { 138 | pos := p.pos 139 | for _, tok := range tokens { 140 | if p.token == tok { 141 | p.next() 142 | return pos 143 | } 144 | } 145 | 146 | p.unexpect(tokens[0].String()) 147 | return pos 148 | } 149 | 150 | func (p *parser) unexpect(except string) { 151 | found := token.TokenLabel(p.token, p.id) 152 | p.errorf("except %s, found %s", except, found) 153 | } 154 | 155 | // defineType 处理数据定义, 同时计算符号长度 156 | func (p *parser) data(cont *[]int64, contLen *int64) { 157 | switch p.token { 158 | case IDENT: // 引用变量,变量必须已经被申明, 如果符号未定义,则记录重定位 159 | lb := p.GetLabel(p.id) 160 | if lb.Type == EQU_LABEL || lb.Type == LOCAL_LABEL { 161 | (*cont)[*contLen] = lb.Addr 162 | } else { // 未定义或非法符号, equ 做了单独处理! 163 | p._addRel(p.id, R_386_32) 164 | } 165 | *contLen++ 166 | p.next() 167 | case INT: 168 | (*cont)[*contLen] = utils.IntBytes(p.id) 169 | *contLen++ 170 | p.next() 171 | case FLOAT: 172 | (*cont)[*contLen] = utils.FloatBytes(p.id) 173 | *contLen++ 174 | p.next() 175 | case STRING: 176 | for _, ch := range []byte(p.id) { 177 | (*cont)[*contLen] = int64(ch) 178 | *contLen++ 179 | } 180 | p.next() 181 | default: 182 | // todo 183 | //p.errorf("[valType](%d,%d): %s, %,数据类型获取异常!", p.token.Message(p.id)) 184 | } 185 | } 186 | 187 | func (p *parser) define(id string, times, size int) { 188 | lb := NewLabel(LOCAL_LABEL) 189 | lb.Times = times 190 | lb.Size = size 191 | lb.Cont = make([]int, 255) // 数据缓存 192 | lb.ContLen = 0 193 | p.data(&lb.Cont, &lb.ContLen) // 这里获得的是值, 数字、字符串、引用名 194 | 195 | // 看是否有连续定义, 例如:"hello world", 13, 10 196 | token := p.NextToken() 197 | for token == COMMA { 198 | p.data(&lb.Cont, &lb.ContLen) // 这里获得的是值, 数字、字符串、引用名 199 | token = p.NextToken() 200 | } 201 | 202 | p.ProcTable.AddLabel(id, lb) 203 | } 204 | 205 | // 以符号名称开始的语句, 数据定义,或代码段标记 206 | func (p *parser) labelDec(id string) { 207 | p.next() 208 | switch p.token { 209 | case A_TIMES: // 需要重复 210 | p.expect(INT) 211 | repeat := utils.Int(p.id) 212 | size := p.size() 213 | p.define(id, utils.Int(p.id), p.size()) // size 代表数据类型 db dd 字符、字、双字 214 | case A_EQU: // equ 常量?伪指令,所有使用到该符号的,全部替换为值,不存在地址。 215 | // 这里需要被替换为值 216 | // 关于 equ 语法说明,equ 支持表达式:可以是数字、地址、其他符号、算术表达式等 217 | // equ 定义的符号在汇编时就被替换为具体值,不会占用内存,也不会生成机器码。 218 | // 不能对 equ 定义的符号赋新值(它不是变量)。 219 | // equ 只能用于常量表达式,不能用于运行时可变的值。 220 | // todo 完整的逻辑需要支持 数字,其他符号,表达式, 【最终获得运算后的值】 221 | // todo equ 引用其它符号,必须提前申明! 222 | p.require(NUMBER) // todo 当前只支持数字 223 | p.ProcTable.AddLabel(id, NewLabelEqu(p.number())) // 直接添加符号 224 | case COLON: // 代码段(label), main: 一般是函数名作为一个单独的记号 225 | p.ProcTable.AddLabel(id, NewLabelText()) // 作为一个段符号 226 | default: // 变量支持 227 | p.Lexer.Back(token) // db, dd, dw // 退回去重新读 p.size() 228 | 229 | p.values(id, 1, p.size()) // 单个变量定义,直接解析 230 | } 231 | } 232 | 233 | func (p *parser) dataList() (*ast.File, error) { 234 | p.next() // 跳过.data 235 | // 解析数据段内容 236 | for p.token > _literal { 237 | switch p.token { 238 | case ".byte", ".word", ".long", ".quad", ".float", ".double", A_STRING: 239 | // 解析数据定义伪指令 240 | decl := p.parseDataDirective() 241 | if decl != nil { 242 | p.declList = append(p.declList, decl) 243 | } 244 | case A_REPT: 245 | // 解析重复定义 246 | decl := p.parseReptDirective() 247 | if decl != nil { 248 | p.declList = append(p.declList, decl) 249 | } 250 | case IDENT: 251 | // 解析标签定义 252 | p.declList = append(p.declList, p.labelDec(p.id)) 253 | case A_GLB: // 全局符号定义 254 | p.require(IDENT) 255 | // 添加到全局符号表 256 | p.ProcTable.AddLabel(p.id, NewLabelGlobal()) 257 | default: 258 | p.errorf("unexpected token in data section: %s", p.token) 259 | return nil, p.error 260 | } 261 | p.next() 262 | } 263 | } 264 | 265 | func (p *parser) ParseFile() (*ast.File, error) { 266 | if p.error != nil { 267 | return nil, p.error 268 | } 269 | 270 | p.next() 271 | 272 | for p.token > _literal { 273 | switch p.token { 274 | case A_DATA: // 数据段定义 275 | p.dataList() 276 | case A_TEXT: // 代码段定义 277 | p.next() // 跳过.text 278 | // 解析代码段内容 279 | for p.token > _literal { 280 | switch p.token { 281 | case IDENT: 282 | p.declList = append(p.declList, p.labelDec(p.id)) 283 | default: 284 | p.inst(p.token) // 解析指令 285 | } 286 | p.next() 287 | } 288 | case IDENT: // 两种情况,段落定义,变量定义 289 | p.declList = append(p.declList, p.labelDec(p.id)) 290 | case A_SEC: // 段定义 291 | p.require(IDENT) 292 | p._switch(p.id) // 切换到新的段 293 | case A_GLB: // 全局符号定义 294 | p.require(IDENT) 295 | // 添加到全局符号表 296 | p.ProcTable.AddLabel(p.id, NewLabelGlobal()) 297 | default: 298 | p.inst(p.token) // 解析指令 299 | } 300 | 301 | p.next() 302 | } 303 | 304 | p._switch("") // 结束最后一个段 305 | 306 | return &ast.File{ 307 | Decls: p.declList, 308 | }, nil 309 | } 310 | 311 | // ExportLb 导出符号表 312 | //func (proc *parser) ExportLb() { 313 | // for _, lb := range proc.MapLabel { 314 | // if !lb.IsEqu { // EQU定义的符号不导出 315 | // ObjFile.addSym(lb) 316 | // } 317 | // } 318 | //} 319 | 320 | //func (proc *parser) WriteData(file *os.File) { 321 | // for _, lb := range proc.DefLabelList { 322 | // lb.write(file) 323 | // } 324 | //} 325 | 326 | //// Codegen 代码生成, 生成代码,同时记录每个段的大小 327 | //func (proc *parser) Codegen() error { 328 | // // 源码扫描完成,开始生成代码, 内部符号已存在 329 | // instrBuffer := bytes.NewBuffer(nil) 330 | // for _, instr := range proc.InstrList { 331 | // instr.WriteOut(instrBuffer, &proc.seg.Offset) 332 | // } 333 | // instrBuffer.Len() // 代码段大小 334 | // 335 | // // important 符号表[可能]存在符号嵌套引用 336 | // // 但是所有嵌套引用,被引用的符号必须被声明 337 | // // 如果引用外部符号,记录地址, 下一个引用符合也只引用所在地址信息 338 | // // 逻辑上无需处理嵌套 339 | // for _, label := range proc.labelList { 340 | // 341 | // } 342 | // //for _, instr := range proc.InstrList { 343 | // // instr.WriteOut(instrBuffer, &proc.seg.Offset) 344 | // //} 345 | // 346 | //} 347 | 348 | //func Check(src, dest []byte, name string) { 349 | // for i, ch := range src { 350 | // if len(dest) <= i { 351 | // fmt.Printf("错误:[0x%X], 两文件内容长度不一致: [%d, %d]", i, len(src), len(dest)) 352 | // return 353 | // } 354 | // if ch != dest[i] { 355 | // fmt.Printf("错误:[0x%X, %d](%X(%d) != %X(%d))", i, i, ch, ch, dest[i], dest[i]) 356 | // 357 | // for i2, b := range src[i-4 : i+16] { 358 | // fmt.Printf("%d: [%d, %d] \n", i+i2-4, b, dest[i+i2-4]) 359 | // } 360 | // fmt.Printf("\n") 361 | // return 362 | // } 363 | // } 364 | // 365 | // fmt.Printf("校验完成,[%s]完全一致!\n", name) 366 | //} 367 | 368 | /* 369 | * 370 | .section .name 371 | .global main # 定义全局符号,使符号对其他文件可见 372 | .local local_func # 定义局部符号,仅在当前文件可见 373 | .type main, @function # 定义符号类型,@function表示这是一个函数 374 | .size main, .-main # 定义符号大小,.-main表示从当前位置到main标签的距离 375 | */ 376 | func (p *Parser) pseudo(word string, args []LineToken) *Program { 377 | switch word { 378 | case ".section": // 分段 379 | 380 | case ".global": 381 | 382 | case ".local": 383 | 384 | case ".type": 385 | 386 | case ".size": 387 | 388 | case ".align": 389 | 390 | case "DATA": 391 | p.asmData(operands) 392 | case "FUNCDATA": 393 | p.asmFuncData(operands) 394 | case "GLOBL": 395 | p.asmGlobl(operands) 396 | case "PCDATA": 397 | p.asmPCData(operands) 398 | case "PCALIGN": 399 | p.asmPCAlign(operands) 400 | case "TEXT": 401 | p.asmText(operands) // 函数申明 402 | default: // 处理符号声明 403 | if len(args) > 0 && args[0].LiteralVal == ":" { 404 | // 说明是符号 405 | } 406 | return false 407 | } 408 | return true 409 | } 410 | 411 | // asmText assembles a TEXT pseudo-op. 412 | // TEXT runtime·sigtramp(SB),4,$0-0 413 | func (p *Parser) asmText(operands [][]lex.Token) { // 记录一个函数到代码段 414 | if len(operands) != 2 && len(operands) != 3 { // 参数至少是,2个或者,,3个 415 | p.errorf("expect two or three operands for TEXT") 416 | return 417 | } 418 | 419 | // Labels are function scoped. Patch existing labels and 420 | // create a new label space for this TEXT. 421 | p.patch() // todo, 多次被调用 422 | p.labels = make(map[string]*obj.Prog) // 每次都初始化? 423 | 424 | // Operand 0 is the symbol name in the form foo(SB). 425 | // That means symbol plus indirect on SB and no offset. 426 | nameAddr := p.address(operands[0]) // 计算地址? 427 | if !p.validSymbol("TEXT", &nameAddr, false) { 428 | return 429 | } 430 | name := symbolName(&nameAddr) 431 | next := 1 432 | 433 | // Next operand is the optional text flag, a literal integer. 434 | var flag = int64(0) 435 | if len(operands) == 3 { 436 | flag = p.evalInteger("TEXT", operands[1]) 437 | next++ 438 | } 439 | 440 | // Issue an error if we see a function defined as ABIInternal 441 | // without NOSPLIT. In ABIInternal, obj needs to know the function 442 | // signature in order to construct the morestack path, so this 443 | // currently isn't supported for asm functions. 444 | if nameAddr.Sym.ABI() == obj.ABIInternal && flag&obj.NOSPLIT == 0 { 445 | p.errorf("TEXT %q: ABIInternal requires NOSPLIT", name) 446 | } 447 | 448 | // Next operand is the frame and arg size. 449 | // Bizarre syntax: $frameSize-argSize is two words, not subtraction. 450 | // Both frameSize and argSize must be simple integers; only frameSize 451 | // can be negative. 452 | // The "-argSize" may be missing; if so, set it to objabi.ArgsSizeUnknown. 453 | // Parse left to right. 454 | op := operands[next] 455 | if len(op) < 2 || op[0].ScanToken != '$' { 456 | p.errorf("TEXT %s: frame size must be an immediate constant", name) 457 | return 458 | } 459 | op = op[1:] 460 | negative := false 461 | if op[0].ScanToken == '-' { 462 | negative = true 463 | op = op[1:] 464 | } 465 | if len(op) == 0 || op[0].ScanToken != scanner.Int { 466 | p.errorf("TEXT %s: frame size must be an immediate constant", name) 467 | return 468 | } 469 | frameSize := p.positiveAtoi(op[0].String()) 470 | if negative { 471 | frameSize = -frameSize 472 | } 473 | op = op[1:] 474 | argSize := int64(abi.ArgsSizeUnknown) 475 | if len(op) > 0 { 476 | // There is an argument size. It must be a minus sign followed by a non-negative integer literal. 477 | if len(op) != 2 || op[0].ScanToken != '-' || op[1].ScanToken != scanner.Int { 478 | p.errorf("TEXT %s: argument size must be of form -integer", name) 479 | return 480 | } 481 | argSize = p.positiveAtoi(op[1].String()) 482 | } 483 | p.ctxt.InitTextSym(nameAddr.Sym, int(flag), p.pos()) 484 | prog := &obj.Prog{ 485 | Ctxt: p.ctxt, 486 | As: obj.ATEXT, 487 | Pos: p.pos(), 488 | From: nameAddr, 489 | To: obj.Addr{ 490 | Type: obj.TYPE_TEXTSIZE, 491 | Offset: frameSize, 492 | // Argsize set below. 493 | }, 494 | } 495 | nameAddr.Sym.Func().Text = prog 496 | prog.To.Val = int32(argSize) 497 | p.append(prog, "", true) // 添加一个代码段? 498 | } 499 | 500 | // asmData assembles a DATA pseudo-op. 501 | // DATA masks<>+0x00(SB)/4, $0x00000000 502 | func (p *Parser) asmData(operands [][]lex.Token) { // 记录一条数据到数据段 503 | if len(operands) != 2 { 504 | p.errorf("expect two operands for DATA") 505 | return 506 | } 507 | 508 | // Operand 0 has the general form foo<>+0x04(SB)/4. 509 | op := operands[0] 510 | n := len(op) 511 | if n < 3 || op[n-2].ScanToken != '/' || op[n-1].ScanToken != scanner.Int { 512 | p.errorf("expect /size for DATA argument") 513 | return 514 | } 515 | szop := op[n-1].String() 516 | sz, err := strconv.Atoi(szop) 517 | if err != nil { 518 | p.errorf("bad size for DATA argument: %q", szop) 519 | } 520 | op = op[:n-2] 521 | nameAddr := p.address(op) 522 | if !p.validSymbol("DATA", &nameAddr, true) { 523 | return 524 | } 525 | name := symbolName(&nameAddr) 526 | 527 | // Operand 1 is an immediate constant or address. 528 | valueAddr := p.address(operands[1]) 529 | switch valueAddr.Type { 530 | case obj.TYPE_CONST, obj.TYPE_FCONST, obj.TYPE_SCONST, obj.TYPE_ADDR: 531 | // OK 532 | default: 533 | p.errorf("DATA value must be an immediate constant or address") 534 | return 535 | } 536 | 537 | // The addresses must not overlap. Easiest test: require monotonicity. 538 | if lastAddr, ok := p.dataAddr[name]; ok && nameAddr.Offset < lastAddr { 539 | p.errorf("overlapping DATA entry for %s", name) 540 | return 541 | } 542 | p.dataAddr[name] = nameAddr.Offset + int64(sz) 543 | 544 | switch valueAddr.Type { 545 | case obj.TYPE_CONST: 546 | switch sz { 547 | case 1, 2, 4, 8: 548 | nameAddr.Sym.WriteInt(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Offset) 549 | default: 550 | p.errorf("bad int size for DATA argument: %d", sz) 551 | } 552 | case obj.TYPE_FCONST: 553 | switch sz { 554 | case 4: 555 | nameAddr.Sym.WriteFloat32(p.ctxt, nameAddr.Offset, float32(valueAddr.Val.(float64))) 556 | case 8: 557 | nameAddr.Sym.WriteFloat64(p.ctxt, nameAddr.Offset, valueAddr.Val.(float64)) 558 | default: 559 | p.errorf("bad float size for DATA argument: %d", sz) 560 | } 561 | case obj.TYPE_SCONST: 562 | nameAddr.Sym.WriteString(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Val.(string)) 563 | case obj.TYPE_ADDR: 564 | if sz == p.arch.PtrSize { 565 | nameAddr.Sym.WriteAddr(p.ctxt, nameAddr.Offset, int(sz), valueAddr.Sym, valueAddr.Offset) 566 | } else { 567 | p.errorf("bad addr size for DATA argument: %d", sz) 568 | } 569 | } 570 | } 571 | 572 | func (p *parser) pseudo() bool { 573 | 574 | } 575 | 576 | func (p *Parser) Parse() *Program { 577 | scratch := make([][]lex.Token, 0, 3) 578 | for { 579 | word, cond, operands, ok := p.line(scratch) // operands = scratch 一维数组为每个参数, 逗号分割, 二维数组是具体的符号和 ident 两种 580 | if !ok { 581 | break 582 | } 583 | scratch = operands 584 | 585 | if p.pseudo(word, operands) { // 处理伪指令,段落、符号定义 DATA TEXT 586 | continue 587 | } 588 | i, present := p.arch.Instructions[word] // 这里取指令操作码 589 | if present { 590 | p.instruction(i, word, cond, operands) // 最重要!处理指令 591 | continue 592 | } 593 | p.errorf("unrecognized instruction %q", word) 594 | } 595 | if p.errorCount > 0 { 596 | return nil, false 597 | } 598 | p.patch() // todo 不知道用途 可能跟标签有关 599 | return p.firstProg, true 600 | } 601 | 602 | func NewParser(lex *lexer) *Parser { 603 | return &Parser{ 604 | lex: lex, 605 | labels: make(map[string]*obj.Prog), 606 | dataAddr: make(map[string]int64), 607 | errorWriter: os.Stderr, 608 | allowABI: ctxt != nil && objabi.LookupPkgSpecial(ctxt.Pkgpath).AllowAsmABI, 609 | pkgPrefix: pkgPrefix, 610 | } 611 | } 612 | 613 | // parseDataDirective 解析数据定义伪指令 614 | func (p *parser) parseDataDirective() *ast.GenDecl { 615 | switch p.token { 616 | case ".byte": // .byte 617 | return p.parseByteDirective() 618 | case ".word": // .word 619 | return p.parseWordDirective() 620 | case ".long": // .long 621 | return p.parseLongDirective() 622 | case ".quad": // .quad 623 | return p.parseQuadDirective() 624 | case ".float", ".single": // .float 625 | return p.parseQuadDirective() 626 | case ".double": // .double 627 | return p.parseQuadDirective() 628 | case ".quad": // .quad 629 | return p.parseQuadDirective() 630 | case ".ascii": // .ascii 631 | return p.parseAsciiDirective() 632 | case ".asciz": // .asciz 633 | return p.parseAscizDirective() 634 | case ".string": // .string 635 | return p.parseStringDirective() 636 | case ".rept": // .rept 637 | return p.parseReptDirective() 638 | default: 639 | p.errorf("unknown data directive: %s", p.token) 640 | return nil 641 | } 642 | } 643 | 644 | // parseByteDirective 解析.byte伪指令 645 | func (p *parser) parseByteDirective() *ast.GenDecl { 646 | decl := &ast.GenDecl{ 647 | Tok: token.DATA, 648 | } 649 | 650 | p.next() // 跳过.byte 651 | 652 | // 解析值列表 653 | for { 654 | switch p.token { 655 | case INT: 656 | // 解析整数值 657 | val := utils.Int(p.id) 658 | decl.Specs = append(decl.Specs, &ast.ValueSpec{ 659 | Type: &ast.Ident{Name: "byte"}, 660 | Values: []ast.Expr{&ast.BasicLit{ 661 | Kind: token.INT, 662 | Value: strconv.FormatInt(val, 10), 663 | }}, 664 | }) 665 | case STRING: 666 | // 解析字符串 667 | for _, ch := range []byte(p.id) { 668 | decl.Specs = append(decl.Specs, &ast.ValueSpec{ 669 | Type: &ast.Ident{Name: "byte"}, 670 | Values: []ast.Expr{&ast.BasicLit{ 671 | Kind: token.INT, 672 | Value: strconv.FormatInt(int64(ch), 10), 673 | }}, 674 | }) 675 | } 676 | case IDENT: 677 | // 解析符号引用 678 | decl.Specs = append(decl.Specs, &ast.ValueSpec{ 679 | Type: &ast.Ident{Name: "byte"}, 680 | Values: []ast.Expr{&ast.Ident{ 681 | Name: p.id, 682 | }}, 683 | }) 684 | default: 685 | p.errorf("invalid value in .byte directive") 686 | return nil 687 | } 688 | 689 | p.next() 690 | if p.token != COMMA { 691 | break 692 | } 693 | p.next() 694 | } 695 | 696 | return decl 697 | } 698 | 699 | // parseAsciiDirective 解析.ascii伪指令 700 | func (p *parser) parseAsciiDirective() *ast.GenDecl { 701 | decl := &ast.GenDecl{ 702 | Tok: token.DATA, 703 | } 704 | 705 | p.next() // 跳过.ascii 706 | 707 | if p.token != STRING { 708 | p.errorf("expected string literal after .ascii") 709 | return nil 710 | } 711 | 712 | // 将字符串转换为字节数组 713 | for _, ch := range []byte(p.id) { 714 | decl.Specs = append(decl.Specs, &ast.ValueSpec{ 715 | Type: &ast.Ident{Name: "byte"}, 716 | Values: []ast.Expr{&ast.BasicLit{ 717 | Kind: token.INT, 718 | Value: strconv.FormatInt(int64(ch), 10), 719 | }}, 720 | }) 721 | } 722 | 723 | p.next() 724 | return decl 725 | } 726 | 727 | // parseAscizDirective 解析.asciz伪指令 728 | func (p *parser) parseAscizDirective() *ast.GenDecl { 729 | decl := p.parseAsciiDirective() 730 | if decl == nil { -------------------------------------------------------------------------------- /compiler/compile/parser/parser_exp.go: -------------------------------------------------------------------------------- 1 | package parser 2 | 3 | import ( 4 | "github.com/facelang/face/compiler/compile/ast" 5 | "github.com/facelang/face/compiler/compile/token" 6 | ) 7 | 8 | // maxNestLev is the deepest we're willing to recurse during parsing 9 | const maxNestLev int = 1e5 10 | 11 | func incNestLev(p *parser) *parser { 12 | p.nestLev++ 13 | if p.nestLev > maxNestLev { 14 | p.error(p.pos, "exceeded max nesting depth") 15 | } 16 | return p 17 | } 18 | 19 | // decNestLev is used to track nesting depth during parsing to prevent stack exhaustion. 20 | // It is used along with incNestLev in a similar fashion to how un and trace are used. 21 | func decNestLev(p *parser) { 22 | p.nestLev-- 23 | } 24 | 25 | // ---------------------------------------------------------------------------- 26 | // Common productions 27 | 28 | // inRhs = true 代表右侧表达式,否则为左侧表达式 29 | func exprList(p *parser, inRhs bool) []ast.Expr { 30 | old := p.inRhs 31 | p.inRhs = inRhs 32 | 33 | list := []ast.Expr{expr(p)} 34 | for p.token == token.COMMA { 35 | p.next() 36 | list = append(list, expr(p)) 37 | } 38 | 39 | p.inRhs = old 40 | return list 41 | } 42 | 43 | // ---------------------------------------------------------------------------- 44 | // Expressions 45 | 46 | //func (p *parser) parseFuncTypeOrLit() ast.Expr { 47 | // 48 | // typ := p.parseFuncType() 49 | // if p.token != tokens.LBRACE { 50 | // // function type only 51 | // return typ 52 | // } 53 | // 54 | // p.exprLev++ 55 | // body := p.parseBody() 56 | // p.exprLev-- 57 | // 58 | // return &ast.FuncLit{Type: typ, Body: body} 59 | //} 60 | 61 | // operand may return an expression or a raw type (incl. array 62 | // types of the form [...]T). Callers must verify the result. 63 | func operand(p *parser) ast.Expr { 64 | switch p.token { 65 | case token.IDENT: // 变量符号 66 | x := p.name() 67 | return x 68 | 69 | case token.INT, token.FLOAT, token.IMAG, token.CHAR, token.STRING: // 值类型 70 | x := &ast.BasicLit{Pos: 0, Kind: p.token, Value: p.identifier} 71 | p.next() 72 | return x 73 | 74 | case token.LPAREN: // (...) 多了一层优先级 75 | lparen := p.pos 76 | p.next() 77 | //p.exprLev++ 78 | x := exprRhs(p) // types may be parenthesized: (some type) 79 | //p.exprLev-- 80 | rparen := p.expect(token.RPAREN) 81 | return &ast.ParenExpr{Lparen: lparen, X: x, Rparen: rparen} 82 | 83 | //case tokens.FUNC: // func ... 84 | // return p.parseFuncTypeOrLit() // todo 暂时忽略 85 | } 86 | 87 | // 上面都是具体值类型 88 | // 下面是数据类型、关键字一类 89 | 90 | // 类型转换 int(123), []string{"a", "b", "c"} 91 | if typ := p.tryIdentOrType(); typ != nil { // do not consume trailing type parameters 92 | // could be type for composite literal or conversion 93 | if _, isIdent := typ.(*ast.Name); !isIdent { 94 | p.error(p.pos, "type cannot be identifier") 95 | } 96 | return typ 97 | } 98 | 99 | // we have an error 100 | pos := p.pos 101 | p.unexpect("operand") 102 | return &ast.BadExpr{From: pos, To: p.pos} 103 | } 104 | 105 | // 只在 parseElement 被调用 106 | func (p *parser) parseValue() ast.Expr { 107 | if p.token == token.LBRACE { 108 | return p.parseLiteralValue(nil) 109 | } 110 | 111 | return expr(p) 112 | } 113 | 114 | // 只在 parseElementList 被调用 115 | func (p *parser) parseElement() ast.Expr { 116 | x := p.parseValue() 117 | if p.token == token.COLON { 118 | colon := p.pos 119 | p.next() 120 | x = &ast.KeyValueExpr{Key: x, Colon: colon, Value: p.parseValue()} 121 | } 122 | 123 | return x 124 | } 125 | 126 | func (p *parser) parseElementList() (list []ast.Expr) { 127 | for p.token != token.RBRACE && p.token != token.EOF { 128 | list = append(list, p.parseElement()) 129 | if p.token != token.COMMA { 130 | break 131 | } 132 | p.next() 133 | } 134 | 135 | return 136 | } 137 | 138 | // 解析复合字面量, {1, 2, 3} {key: value} 类型 139 | func (p *parser) parseLiteralValue(typ ast.Expr) ast.Expr { 140 | defer decNestLev(incNestLev(p)) 141 | 142 | lbrace := p.expect(token.LBRACE) 143 | var elts []ast.Expr 144 | //p.exprLev++ 145 | if p.token != token.RBRACE { 146 | elts = p.parseElementList() 147 | } 148 | //p.exprLev-- 149 | rbrace := p.expect(token.RBRACE) 150 | return &ast.CompositeLit{Type: typ, Lbrace: lbrace, Elts: elts, Rbrace: rbrace} 151 | } 152 | 153 | // packIndexExpr returns an IndexExpr x[expr0] or IndexListExpr x[expr0, ...]. 154 | func packIndexExpr(x ast.Expr, lbrack token.Pos, exprs []ast.Expr, rbrack token.Pos) ast.Expr { 155 | switch len(exprs) { 156 | case 0: 157 | panic("internal error: packIndexExpr with empty expr slice") 158 | case 1: 159 | return &ast.IndexExpr{ 160 | X: x, 161 | Lbrack: lbrack, 162 | Index: exprs[0], 163 | Rbrack: rbrack, 164 | } 165 | default: 166 | return &ast.IndexListExpr{ 167 | X: x, 168 | Lbrack: lbrack, 169 | Indices: exprs, 170 | Rbrack: rbrack, 171 | } 172 | } 173 | } 174 | 175 | func (p *parser) parseIndexOrSliceOrInstance(x ast.Expr) ast.Expr { 176 | lbrack := p.expect(token.LBRACK) 177 | if p.token == token.RBRACK { // 直接结束, 抛异常 178 | p.unexpect("[operand is empty]") 179 | rbrack := p.pos 180 | p.next() 181 | return &ast.IndexExpr{ 182 | X: x, 183 | Lbrack: lbrack, 184 | Index: &ast.BadExpr{From: rbrack, To: rbrack}, 185 | Rbrack: rbrack, 186 | } 187 | } 188 | //p.exprLev++ 189 | 190 | const N = 3 // [index] [:] [::] 191 | var args []ast.Expr // 值类型 [1, 2, 3] 192 | var index [N]ast.Expr 193 | var colons [N - 1]token.Pos 194 | if p.token != token.COLON { 195 | index[0] = exprRhs(p) 196 | } 197 | ncolons := 0 198 | switch p.token { 199 | case token.COLON: 200 | // slice expression 201 | for p.token == token.COLON && ncolons < len(colons) { 202 | colons[ncolons] = p.pos 203 | ncolons++ 204 | p.next() 205 | if p.token != token.COLON && p.token != token.RBRACK && p.token != token.EOF { 206 | index[ncolons] = exprRhs(p) 207 | } 208 | } 209 | case token.COMMA: // , 210 | // instance expression 211 | args = append(args, index[0]) 212 | for p.token == token.COMMA { 213 | p.next() 214 | if p.token != token.RBRACK && p.token != token.EOF { 215 | args = append(args, p.parseType()) 216 | } 217 | } 218 | } 219 | 220 | // p.exprLev-- 221 | rbrack := p.expect(token.RBRACK) 222 | 223 | if ncolons > 0 { // 切片类型 224 | // slice expression 225 | slice3 := false 226 | if ncolons == 2 { 227 | slice3 = true 228 | // Check presence of middle and final index here rather than during type-checking 229 | // to prevent erroneous programs from passing through gofmt (was go.dev/issue/7305). 230 | if index[1] == nil { 231 | p.error(colons[0], "middle index required in 3-index slice") 232 | index[1] = &ast.BadExpr{From: colons[0] + 1, To: colons[1]} 233 | } 234 | if index[2] == nil { 235 | p.error(colons[1], "final index required in 3-index slice") 236 | index[2] = &ast.BadExpr{From: colons[1] + 1, To: rbrack} 237 | } 238 | } 239 | return &ast.SliceExpr{X: x, Lbrack: lbrack, Low: index[0], High: index[1], Max: index[2], Slice3: slice3, Rbrack: rbrack} 240 | } 241 | 242 | if len(args) == 0 { 243 | // index expression 244 | return &ast.IndexExpr{X: x, Lbrack: lbrack, Index: index[0], Rbrack: rbrack} 245 | } 246 | 247 | // instance expression 248 | return packIndexExpr(x, lbrack, args, rbrack) 249 | } 250 | 251 | // 函数调用或类型转换,类型转换本身就是一种函数调用 252 | func (p *parser) funcCall(fun ast.Expr) *ast.CallExpr { 253 | lparen := p.expect(token.LPAREN) // 开始 254 | //p.exprLev++ 255 | var list []ast.Expr 256 | var ellipsis token.Pos 257 | for p.token != token.RPAREN && p.token != token.EOF && !ellipsis.IsValid() { 258 | list = append(list, exprRhs(p)) // builtins may expect a type: make(some type, ...) 259 | if p.token == token.ELLIPSIS { 260 | ellipsis = p.pos 261 | p.next() 262 | } 263 | 264 | // 逗号,继续解析下一个参数, 否则结束 265 | if p.token != token.COMMA { 266 | break 267 | } 268 | p.next() 269 | } 270 | //p.exprLev-- 271 | rparen := p.expect(token.RPAREN) // 关闭 272 | 273 | return &ast.CallExpr{Fun: fun, Lparen: lparen, Args: list, Ellipsis: ellipsis, Rparen: rparen} 274 | } 275 | 276 | // 处理后缀表达式, 比如: x.name, x[123] 277 | func primaryExpr(p *parser, x ast.Expr) ast.Expr { 278 | if x == nil { 279 | x = operand(p) 280 | } 281 | 282 | var n int 283 | //defer func() { p.nestLev -= n }() 284 | for n = 1; ; n++ { // 持续++ 285 | //incNestLev(p) 286 | switch p.token { 287 | case token.PERIOD: // x. 只能接 ident 288 | p.next() 289 | x = &ast.SelectorExpr{X: x, Sel: p.name()} 290 | case token.LBRACK: // x[...], x[1], x[:] 291 | x = p.parseIndexOrSliceOrInstance(x) // todo 292 | case token.LPAREN: // x(...), 函数调用或类型转换 293 | x = p.funcCall(x) 294 | case token.LBRACE: // todo {} 什么意思? 295 | // operand may have returned a parenthesized complit 296 | // type; accept it but complain if we have a complit 297 | t := ast.Unparen(x) // 解括号 (), 获取 x 真实类型 298 | // determine if '{' belongs to a composite literal or a block statement 299 | switch t.(type) { // 一些特殊情况直接返回 x, 其它情况,需要继续解析 300 | case *ast.BadExpr, *ast.Name, *ast.SelectorExpr: // 有条件解析 301 | //if p.exprLev < 0 { // 有一些解析过程会将 exprLev = -1 302 | // return x 303 | //} 304 | // x is possibly a composite literal type 305 | case *ast.IndexExpr, *ast.IndexListExpr: // 有条件解析 306 | //if p.exprLev < 0 { 307 | // return x 308 | //} 309 | // x is possibly a composite literal type 310 | case *ast.ArrayType, *ast.StructType, *ast.MapType: 311 | // x is a composite literal type 312 | // 数组,结构体, 字典,直接解析 313 | default: 314 | return x 315 | } 316 | if t != x { 317 | p.error(t.Position(), "cannot parenthesize type in composite literal") 318 | // already progressed, no need to advance 319 | } 320 | x = p.parseLiteralValue(x) // todo 已实现,可能不需要 321 | default: 322 | return x 323 | } 324 | } 325 | } 326 | 327 | // 一元运算符, go 支持 <- 和 *, 目前仅支持 +-!&| 328 | func unaryExpr(p *parser) ast.Expr { 329 | defer decNestLev(incNestLev(p)) 330 | 331 | switch p.token { 332 | case token.ADD, token.SUB, token.NOT, token.XOR, token.AND, token.TILDE: // +, -, !, ^, ~ 333 | pos, op := p.pos, p.token 334 | p.next() 335 | x := unaryExpr(p) // 再解析... 336 | return &ast.UnaryExpr{OpPos: pos, Op: op, X: x} 337 | } 338 | 339 | return primaryExpr(p, nil) // 更低级表达式 340 | } 341 | 342 | // 获得 token 和 优先级; 特例:将右值表达式中的 赋值符号 视为 == 343 | func precedence(p *parser) (token.Token, int) { 344 | tok := p.token 345 | if p.inRhs && tok == token.ASSIGN { 346 | tok = token.EQL 347 | } 348 | return tok, tok.Precedence() // 这个应该是优先级 349 | } 350 | 351 | // 二元表达式 352 | func binaryExpr(p *parser, x ast.Expr, prec1 int) ast.Expr { 353 | if x == nil { // 第一次调用为空, 一定会执行 354 | x = unaryExpr(p) // 先取一元表达式 355 | } 356 | 357 | var n int 358 | defer func() { p.nestLev -= n }() 359 | for n = 1; ; n++ { 360 | incNestLev(p) 361 | // 判断优先级 362 | op, oprec := precedence(p) 363 | if oprec < prec1 { // 传入优先级 会 +1, 所以相同优先级会终止 364 | return x 365 | } 366 | pos := p.expect(op) 367 | y := binaryExpr(p, nil, oprec+1) // 优先级 +1, 同优先级,直接返回 368 | x = &ast.BinaryExpr{X: x, OpPos: pos, Op: op, Y: y} 369 | } 370 | } 371 | 372 | func exprRhs(p *parser) ast.Expr { 373 | old := p.inRhs 374 | p.inRhs = true 375 | x := expr(p) 376 | p.inRhs = old 377 | return x 378 | } 379 | 380 | // The result may be a type or even a raw type ([...]int). 381 | // expr() -> binaryExpr() -> unaryExpr() -> pexpr() -> operand() 382 | // 从高到低: 二元运算符优先级最高, 其次一元运算符, 其他运算符, 操作数 383 | // 二元运算符 还需要进一步判断优先级 384 | func expr(p *parser) ast.Expr { 385 | return binaryExpr(p, nil, token.LowestPrec+1) // 最低优先级? 386 | } 387 | 388 | type field struct { 389 | name *ast.Name 390 | typ ast.Expr 391 | } 392 | 393 | func (p *parser) parseDotsType() *ast.Ellipsis { 394 | pos := p.expect(token.ELLIPSIS) 395 | elt := p.parseType() 396 | 397 | return &ast.Ellipsis{Ellipsis: pos, Elt: elt} 398 | } 399 | 400 | // 解析单条参数, name 一般为空(大部分时间), typesetsok 一般为 false 401 | func (p *parser) parseParamDecl(name *ast.Name, typeSetsOK bool) (f field) { 402 | 403 | ptok := p.token 404 | if name != nil { // 有参数名, 强制 tokens.IDENT 405 | p.token = token.IDENT // force tokens.IDENT case in switch below 406 | } else if typeSetsOK && p.token == token.TILDE { 407 | // "~" ... 408 | return field{nil, p.embeddedElem(nil)} 409 | } 410 | 411 | switch p.token { // 判断符号类型 412 | case token.IDENT: 413 | // name 414 | if name != nil { 415 | f.name = name 416 | p.token = ptok // 暂存, 恢复后尝试解析类型 417 | } else { 418 | f.name = p.name() // 解析参数名 419 | } 420 | switch p.token { // 再次判断符号 421 | case token.IDENT, token.MUL, token.ARROW, token.FUNC, token.CHAN, token.MAP, token.STRUCT, token.INTERFACE, token.LPAREN: 422 | // name type 423 | f.typ = p.parseType() // 解析符号 424 | 425 | case token.LBRACK: // [] 数组类型 426 | // name "[" type1, ..., typeN "]" or name "[" n "]" type 427 | f.name, f.typ = p.parseArrayFieldOrTypeInstance(f.name) 428 | 429 | case token.ELLIPSIS: // ... 可变参数 430 | // name "..." type 431 | f.typ = p.parseDotsType() 432 | return // don't allow ...type "|" ... 433 | 434 | case token.PERIOD: // . 选择器 name.xxx, 这种一定判定为 类型, 而不是参数名 435 | // name "." ... 436 | f.typ = p.parseQualifiedIdent(f.name) 437 | f.name = nil 438 | 439 | case token.TILDE: // ~ 类型约束 440 | if typeSetsOK { 441 | f.typ = p.embeddedElem(nil) 442 | return 443 | } 444 | 445 | case token.OR: // | 类型约束 446 | if typeSetsOK { 447 | // name "|" typeset 448 | f.typ = p.embeddedElem(f.name) 449 | f.name = nil 450 | return 451 | } 452 | } 453 | 454 | case token.MUL, token.ARROW, token.FUNC, token.LBRACK, token.CHAN, token.MAP, token.STRUCT, token.INTERFACE, token.LPAREN: 455 | // type 456 | f.typ = p.parseType() 457 | 458 | case token.ELLIPSIS: 459 | // "..." type 460 | // (always accepted) 461 | f.typ = p.parseDotsType() 462 | return // don't allow ...type "|" ... 463 | 464 | default: 465 | // TODO(rfindley): this is incorrect in the case of type parameter lists 466 | // (should be "']'" in that case) 467 | p.unexpect("')'") 468 | } 469 | 470 | // [name] type "|" 471 | if typeSetsOK && p.token == token.OR && f.typ != nil { 472 | f.typ = p.embeddedElem(f.typ) 473 | } 474 | 475 | return 476 | } 477 | 478 | // 多处调用, 默认调用 name0, type0 = nil ] or ) 479 | // parseMethodSpec中 name0 != nil, typ0 = nil ] 480 | // parseGenericType中 name0, typ0 != nil ] 481 | func (p *parser) parseParameterList(name0 *ast.Name, typ0 ast.Expr, closing token.Token) (params []*ast.Field) { 482 | // Type parameters are the only parameter list closed by ']'. 483 | tparams := closing == token.RBRACK // 是否是泛型参数 484 | 485 | pos0 := p.pos 486 | if name0 != nil { 487 | pos0 = name0.Position() 488 | } else if typ0 != nil { 489 | pos0 = typ0.Position() 490 | } 491 | 492 | // Note: The code below matches the corresponding code in the syntax 493 | // parser closely. Changes must be reflected in either parser. 494 | // For the code to match, we use the local []field list that 495 | // corresponds to []syntax.Field. At the end, the list must be 496 | // converted into an []*ast.Field. 497 | 498 | var list []field 499 | var named int // number of parameters that have an explicit name and type 500 | var typed int // number of parameters that have an explicit type 501 | 502 | // todo 第一个参数不为空,或者不是结束符,则继续解析 503 | // p.tok != closing, 就会一直循环 504 | for name0 != nil || p.token != closing && p.token != token.EOF { 505 | var par field 506 | if typ0 != nil { // todo 有泛型参数的情况 507 | if tparams { 508 | typ0 = p.embeddedElem(typ0) 509 | } 510 | par = field{name0, typ0} 511 | } else { // 主要解析过程, 解析单条参数 512 | par = p.parseParamDecl(name0, tparams) // name0 可能为空 513 | } 514 | name0 = nil // 1st name was consumed if present // 第一次使用后删除 515 | typ0 = nil // 1st typ was consumed if present // 第一次使用后删除 516 | if par.name != nil || par.typ != nil { // 解析到参数,添加到list, 并统计(参数数量和类型数量) 517 | list = append(list, par) 518 | if par.name != nil && par.typ != nil { 519 | named++ 520 | } 521 | if par.typ != nil { // 参数名可以为空? 522 | typed++ 523 | } 524 | // todo 实际解析, 单类型参数,会被解析为 par.name && par.typ = nil 525 | } 526 | if p.token != token.COMMA { 527 | break 528 | } 529 | p.next() // 取下一个符号,继续解析 530 | } 531 | 532 | if len(list) == 0 { 533 | return // not uncommon 534 | } 535 | 536 | // distribute parameter types (len(list) > 0) 537 | if named == 0 { // 处理未命名参数, 声明段,可以不命名参数 538 | // all unnamed => found names are type names 539 | for i := 0; i < len(list); i++ { // 类似 func(int, string) 这样的会被解析为 只有 name, 需要转为 仅 type 540 | par := &list[i] 541 | if typ := par.name; typ != nil { 542 | par.typ = typ 543 | par.name = nil 544 | } 545 | } 546 | if tparams { // 一般为 false, 处理单泛型类型(没有类型约束)Class[T, B, C], 直接抛出异常??? 547 | // This is the same error handling as below, adjusted for type parameters only. 548 | // See comment below for details. (go.dev/issue/64534) 549 | var errPos token.Pos 550 | var msg string 551 | if named == typed /* same as typed == 0 */ { 552 | errPos = p.pos // position error at closing ] 553 | msg = "missing type constraint" 554 | } else { 555 | errPos = pos0 // position at opening [ or first name 556 | msg = "missing type parameter name" 557 | if len(list) == 1 { 558 | msg += " or invalid array length" 559 | } 560 | } 561 | p.error(errPos, msg) 562 | } 563 | } else if named != len(list) { // 类似 ?? func (a, b, c int) 564 | // some named or we're in a type parameter list => all must be named 565 | var errPos token.Pos // left-most error position (or invalid) 566 | var typ ast.Expr // current type (from right to left) 567 | for i := len(list) - 1; i >= 0; i-- { // 从右向左扫描参数列表 568 | if par := &list[i]; par.typ != nil { // par.typ != nil 记录类型,向前 569 | typ = par.typ 570 | if par.name == nil { // 参数名为空? 571 | errPos = typ.Position() // 记录一个异常 572 | n := &ast.Name{Pos: errPos, Name: "_"} 573 | par.name = n // 记录一个 _ 下划线变量 574 | } 575 | } else if typ != nil { // par.typ == nil && typ != nil 576 | par.typ = typ 577 | } else { 578 | // par.typ == nil && typ == nil => we only have a par.name 579 | errPos = par.name.Position() 580 | par.typ = &ast.BadExpr{From: errPos, To: p.pos} 581 | } 582 | } 583 | if errPos.IsValid() { // par.name == nil || typ == nil && par.typ == nil 584 | // Not all parameters are named because named != len(list). 585 | // If named == typed, there must be parameters that have no types. 586 | // They must be at the end of the parameter list, otherwise types 587 | // would have been filled in by the right-to-left sweep above and 588 | // there would be no error. 589 | // If tparams is set, the parameter list is a type parameter list. 590 | var msg string 591 | if named == typed { 592 | errPos = p.pos // position error at closing token ) or ] 593 | if tparams { 594 | msg = "missing type constraint" 595 | } else { 596 | msg = "missing parameter type" 597 | } 598 | } else { 599 | if tparams { 600 | msg = "missing type parameter name" 601 | // go.dev/issue/60812 602 | if len(list) == 1 { 603 | msg += " or invalid array length" 604 | } 605 | } else { 606 | msg = "missing parameter name" 607 | } 608 | } 609 | p.error(errPos, msg) 610 | } 611 | } 612 | 613 | // Convert list to []*ast.Field. 614 | // If list contains types only, each type gets its own ast.Field. 615 | if named == 0 { 616 | // parameter list consists of types only 617 | for _, par := range list { // 再一次过滤空异常 618 | if par.typ == nil { 619 | p.error(p.pos, "nil type in unnamed parameter list") 620 | } 621 | params = append(params, &ast.Field{Type: par.typ}) 622 | } 623 | return 624 | } 625 | 626 | // If the parameter list consists of named parameters with types, 627 | // collect all names with the same types into a single ast.Field. 628 | var names []*ast.Name 629 | var typ ast.Expr 630 | addParams := func() { 631 | if typ == nil { 632 | p.error(p.pos, "nil type in unnamed parameter list") 633 | } 634 | field := &ast.Field{Names: names, Type: typ} 635 | params = append(params, field) 636 | names = nil 637 | } 638 | for _, par := range list { 639 | if par.typ != typ { 640 | // 将参数分组,相同类型的参数,添加到一个字段 641 | if len(names) > 0 { // 第一次为0 642 | addParams() // 添加一次, 清空一次 names 643 | } 644 | typ = par.typ // 记录 645 | } 646 | names = append(names, par.name) 647 | } 648 | // 最后调用一次,避免循环结束漏掉了 649 | if len(names) > 0 { 650 | addParams() 651 | } 652 | return 653 | } 654 | 655 | // 可以解析参数以及泛型参数, 包括:接收者、参数、返回值 656 | // 解析参数时, acceptTParams=true 可以同时解析泛型参数 657 | func (p *parser) parseParameters(acceptTParams bool) (tparams, params *ast.FieldList) { 658 | // todo 可以同时解析泛型参数 659 | if acceptTParams && p.token == token.LBRACK { 660 | opening := p.pos 661 | p.next() 662 | // [T any](params) syntax 663 | list := p.parseParameterList(nil, nil, token.RBRACK) 664 | rbrack := p.expect(token.RBRACK) // ] 结束 665 | tparams = &ast.FieldList{Opening: opening, List: list, Closing: rbrack} 666 | // Type parameter lists must not be empty. 667 | if tparams.NumFields() == 0 { 668 | p.error(tparams.Closing, "empty type parameter list") 669 | tparams = nil // avoid follow-on errors 670 | } 671 | } 672 | 673 | // 这里开始解析参数列表 674 | opening := p.expect(token.LPAREN) // () 675 | 676 | var fields []*ast.Field 677 | if p.token != token.RPAREN { // )结束,判定 678 | fields = p.parseParameterList(nil, nil, token.RPAREN) 679 | } 680 | 681 | rparen := p.expect(token.RPAREN) // ) 消耗掉结束符 682 | params = &ast.FieldList{Opening: opening, List: fields, Closing: rparen} 683 | 684 | return // 返回两个参数,一个 tparams 一个 params 685 | } 686 | 687 | func (p *parser) parseResult() *ast.FieldList { 688 | if p.token == token.LPAREN { 689 | _, results := p.parseParameters(false) 690 | return results 691 | } 692 | 693 | typ := p.tryIdentOrType() 694 | if typ != nil { 695 | list := make([]*ast.Field, 1) 696 | list[0] = &ast.Field{Type: typ} 697 | return &ast.FieldList{List: list} 698 | } 699 | 700 | return nil 701 | } 702 | -------------------------------------------------------------------------------- /docs/汇编语法详解.md: -------------------------------------------------------------------------------- 1 | # 汇编语言详解 2 | 3 | ## 目录 4 | 1. [汇编语言概述](#汇编语言概述) 5 | 2. [汇编语言风格介绍](#汇编语言风格介绍) 6 | 3. [AT&T 汇编语法详解](#att-汇编语法详解) 7 | 4. [Intel 汇编语法详解](#intel-汇编语法详解) 8 | 5. [Plan 9 汇编语法详解](#plan-9-汇编语法详解) 9 | 5. [三种汇编语法结构对比](#三种汇编语法结构对比) 10 | 6. [寄存器详解](#寄存器详解) 11 | 7. [指令集详解](#指令集详解) 12 | 8. [内存寻址](#内存寻址) 13 | 9. [程序结构](#程序结构) 14 | 10. [系统调用](#系统调用) 15 | 11. [实际应用示例](#实际应用示例) 16 | 17 | ## 汇编语言概述 18 | 19 | ### 什么是汇编语言 20 | 汇编语言是一种低级编程语言,它与机器语言有着一一对应的关系。汇编语言使用助记符(mnemonics)来表示机器指令,使得程序更容易编写和理解。 21 | 22 | ### 汇编语言的特点 23 | - 直接操作硬件 24 | - 执行效率高 25 | - 代码体积小 26 | - 可移植性差 27 | - 开发效率低 28 | 29 | ### 汇编语言的用途 30 | - 操作系统开发 31 | - 驱动程序开发 32 | - 嵌入式系统 33 | - 性能优化 34 | - 逆向工程 35 | 36 | ### 汇编语言风格 37 | 由于历史原因和不同厂商的实现,形成了多种不同的语法风格。主要的汇编语言风格包括: 38 | 39 | 1. AT&T 风格 40 | - 由 AT&T 贝尔实验室开发 41 | - 在 Unix/Linux 系统中广泛使用 42 | - 语法特点:源操作数在前,目标操作数在后 43 | - 寄存器名前加 `%`,立即数前加 `$` 44 | 45 | 2. Intel 风格 46 | - 由 Intel 公司开发 47 | - 在 Windows 和 DOS 系统中广泛使用 48 | - 语法特点:目标操作数在前,源操作数在后 49 | - 直接使用寄存器名,不使用特殊前缀 50 | 51 | 3. NASM 风格 52 | - 开源汇编器 NASM 使用的语法 53 | - 基于 Intel 风格但有所扩展 54 | - 支持更多现代特性 55 | - 跨平台兼容性好 56 | 57 | 4. MASM 风格 58 | - Microsoft 宏汇编器使用的语法 59 | - 基于 Intel 风格 60 | - 支持丰富的宏和伪指令 61 | - 主要用于 Windows 平台 62 | 63 | 5. GAS 风格 64 | - GNU 汇编器使用的语法 65 | - 基于 AT&T 风格 66 | - 在 Linux 系统中广泛使用 67 | - 支持多种架构 68 | 69 | 6. Plan 9 风格 70 | - 由贝尔实验室的 Plan 9 操作系统开发 71 | - 在 Go 语言中广泛使用 72 | - 语法特点: 73 | - 使用 `MOV` 等大写指令 74 | - 寄存器名前加 `R`(如 `R0`, `R1`) 75 | - 立即数前加 `$` 76 | - 内存引用使用 `(R0)` 形式 77 | - 优势: 78 | - 语法简洁统一 79 | - 跨平台支持好 80 | - 与 Go 语言工具链集成 81 | - 适合系统编程 82 | 83 | ### 汇编语言风格对比 84 | 85 | ```nasm 86 | # AT&T 风格 87 | movl $42, %eax 88 | addl %ebx, %eax 89 | movl (%eax), %ebx 90 | 91 | # Intel 风格 92 | mov eax, 42 93 | add eax, ebx 94 | mov ebx, [eax] 95 | 96 | # NASM 风格 97 | mov eax, 42 98 | add eax, ebx 99 | mov ebx, [eax] 100 | 101 | # MASM 风格 102 | mov eax, 42 103 | add eax, ebx 104 | mov ebx, [eax] 105 | 106 | # Plan 9 风格 107 | MOV $42, R0 108 | ADD R1, R0 109 | MOV (R0), R1 110 | ``` 111 | 112 | ### 最受欢迎的汇编语法风格 113 | 114 | #### AT&T 语法 115 | AT&T 语法是目前最受欢迎的汇编语法风格之一,主要原因包括: 116 | 117 | 1. 开源社区支持 118 | - GCC/LLVM 编译器默认使用 AT&T 语法 119 | - 大多数开源工具链支持 AT&T 语法 120 | - 在 Linux 系统上工具链完善 121 | 122 | 2. 跨平台性 123 | - 在 Unix/Linux 系统上统一使用 124 | - 支持多种 CPU 架构 125 | - 语法规则更加一致 126 | 127 | 3. 教育领域 128 | - 大多数计算机体系结构课程使用 AT&T 语法 129 | - 教材和参考资料丰富 130 | - 学习资源更容易获取 131 | 132 | 4. 工具支持 133 | - GDB 调试器默认使用 AT&T 语法 134 | - objdump 等工具支持 AT&T 语法 135 | - 开发工具链完善 136 | 137 | #### Intel 语法 138 | Intel 语法在特定领域仍然保持重要地位: 139 | 140 | 1. Windows 平台 141 | - Visual Studio 支持 Intel 语法 142 | - Windows 驱动程序开发 143 | - 系统级编程 144 | 145 | 2. 历史原因 146 | - 大量 legacy 代码使用 Intel 语法 147 | - 向后兼容性需求 148 | - 企业级应用支持 149 | 150 | 3. 文档支持 151 | - Intel 官方文档使用 Intel 语法 152 | - 企业级应用文档 153 | - 驱动程序开发文档 154 | 155 | 156 | ## AT&T 汇编语法详解 157 | 158 | ### 基本指令 159 | 160 | 汇编语言的基本指令用于执行各种操作,如数据传输、运算、控制流等。 161 | 162 | 指令的基本格式: 163 | ``` 164 | 操作码 源操作数, 目标操作数 165 | ``` 166 | 167 | 格式说明: 168 | - 操作码:指定要执行的操作 169 | - 源操作数:提供操作的数据 170 | - 目标操作数:存储操作结果 171 | 172 | 示例: 173 | ```nasm 174 | movl $42, %eax # 立即数到寄存器 175 | movl %eax, %ebx # 寄存器到寄存器 176 | movl (%eax), %ebx # 内存到寄存器 177 | movl %eax, (%ebx) # 寄存器到内存 178 | ``` 179 | 180 | 注意:根据不同的CPU架构(x86、ARM、RISC-V等),指令的具体格式和操作数表示方式会有所不同。 181 | 182 | ### 伪指令 183 | AT&T 风格的伪指令主要用于定义段、符号和数据: 184 | 185 | 1. 段定义伪指令 186 | ```nasm 187 | .section .text # 代码段,用于存放指令机器码 188 | .section .data # 数据段,用于存放已初始化的数据 189 | .section .bss # 未初始化数据段,用于存放未初始化的数据 190 | .section .rodata # 只读数据段,用于存放常量数据 191 | ``` 192 | 193 | 2. 符号定义伪指令 194 | ```nasm 195 | .global main # 定义全局符号,使符号对其他文件可见 196 | .local local_func # 定义局部符号,仅在当前文件可见 197 | .type main, @function # 定义符号类型,@function表示这是一个函数 198 | .size main, .-main # 定义符号大小,.-main表示从当前位置到main标签的距离 199 | ``` 200 | 201 | 3. 数据定义伪指令 202 | ```nasm 203 | .byte 42 # 定义8位数据(1字节) 204 | .word 42 # 定义16位数据(2字节) 205 | .long 42 # 定义32位数据(4字节) 206 | .quad 42 # 定义64位数据(8字节) 207 | .ascii "Hello" # 定义ASCII字符串,不以null结尾 208 | .asciz "Hello" # 定义以null结尾的ASCII字符串 209 | .align 4 # 4字节对齐,确保下一个数据从4字节边界开始 210 | ``` 211 | 212 | 这些伪指令的正确使用对于生成正确的目标文件和调试信息非常重要。它们不仅影响程序的布局,还影响链接器如何处理符号,以及调试器如何显示程序信息。 213 | 214 | ### 伪指令使用示例 215 | 216 | 下面通过一个完整的示例程序来展示各种伪指令的实际使用场景: 217 | 218 | ```nasm 219 | # 文件信息 220 | .file "example.s" # 指定源文件名,帮助调试器定位源代码 221 | 222 | # 数据段定义 223 | .section .data 224 | # 基本数据类型定义 225 | byte_val: .byte 42 # 定义一个字节的变量 226 | word_val: .word 0x1234 # 定义一个字的变量 227 | long_val: .long 0x12345678 # 定义一个双字的变量 228 | quad_val: .quad 0x1234567890ABCDEF # 定义一个四字的变量 229 | 230 | # 字符串定义 231 | str1: .ascii "Hello" # 普通ASCII字符串 232 | str2: .asciz "World" # 以null结尾的字符串 233 | 234 | # 数组定义 235 | array: .long 1, 2, 3, 4, 5 # 定义一个整数数组 236 | 237 | # 对齐示例 238 | .align 4 # 确保下一个数据从4字节边界开始 239 | aligned_data: .long 0xFFFFFFFF # 这个数据会被对齐到4字节边界 240 | 241 | # 未初始化数据段 242 | .section .bss 243 | buffer: .space 1024 # 分配1KB的缓冲区 244 | .align 8 # 8字节对齐 245 | aligned_buf: .space 64 # 分配64字节的对齐缓冲区 246 | 247 | # 只读数据段 248 | .section .rodata 249 | const_str: .string "Constant" # 定义只读字符串 250 | const_array: .long 1, 2, 3, 4 # 定义只读数组 251 | 252 | # 代码段 253 | .section .text 254 | # 全局函数定义 255 | .global main # 声明main为全局符号 256 | .type main, @function # 指定main为函数类型 257 | main: 258 | pushl %ebp # 保存旧的基址指针 259 | movl %esp, %ebp # 设置新的基址指针 260 | 261 | # 函数体 262 | movl $0, %eax # 返回值设为0 263 | 264 | movl %ebp, %esp # 恢复栈指针 265 | popl %ebp # 恢复旧的基址指针 266 | ret # 返回 267 | .size main, .-main # 计算main函数的大小 268 | 269 | # 局部函数定义 270 | .local helper_func # 声明局部函数 271 | .type helper_func, @function # 指定函数类型 272 | helper_func: 273 | # 函数实现 274 | ret 275 | .size helper_func, .-helper_func # 计算函数大小 276 | 277 | # 调试信息 278 | .section .debug_info 279 | .long 0 # 调试信息版本 280 | .string "example.s" # 源文件名 281 | .long 1 # 行号信息 282 | .long 10 # 列号信息 283 | 284 | # 编译器信息 285 | .section .comment 286 | .string "GCC: (GNU) 9.3.0" # 编译器版本信息 287 | ``` 288 | 289 | 这个示例展示了各种伪指令的典型使用场景: 290 | 291 | 1. 段定义和属性: 292 | - 使用`.section`定义不同的段 293 | - 使用`.align`控制数据对齐 294 | - 使用`.global`和`.local`控制符号可见性 295 | 296 | 2. 数据定义: 297 | - 使用`.byte`、`.word`、`.long`、`.quad`定义不同大小的数据 298 | - 使用`.ascii`和`.asciz`定义字符串 299 | - 使用`.space`分配未初始化的空间 300 | 301 | 3. 函数定义: 302 | - 使用`.type`指定函数类型 303 | - 使用`.size`计算函数大小 304 | - 使用`.global`和`.local`控制函数可见性 305 | 306 | 4. 调试信息: 307 | - 使用`.file`指定源文件 308 | - 使用`.section .debug_info`添加调试信息 309 | - 使用`.section .comment`添加编译器信息 310 | 311 | 这些伪指令的正确使用确保了: 312 | - 数据正确对齐,提高访问效率 313 | - 符号正确导出,支持链接 314 | - 调试信息完整,便于调试 315 | - 代码结构清晰,易于维护 316 | 317 | ### 函数定义 318 | ```nasm 319 | .global main 320 | main: 321 | pushl %ebp 322 | movl %esp, %ebp 323 | # 函数体 324 | movl %ebp, %esp 325 | popl %ebp 326 | ret 327 | ``` 328 | 329 | ### 数据定义 330 | ```nasm 331 | .section .data 332 | msg: .ascii "Hello" 333 | len: .long 5 334 | array: .long 1, 2, 3, 4, 5 335 | ``` 336 | 337 | ### 注释风格 338 | ```nasm 339 | # 单行注释 340 | /* 多行注释 341 | 可以跨越多行 */ 342 | ``` 343 | 344 | ### 宏定义详解 345 | AT&T 风格的宏定义支持参数化和条件编译: 346 | 347 | 1. 基本宏定义 348 | ```nasm 349 | .macro push_reg reg 350 | pushl %\reg 351 | .endm 352 | 353 | .macro pop_reg reg 354 | popl %\reg 355 | .endm 356 | ``` 357 | 358 | 2. 带参数的宏 359 | ```nasm 360 | .macro mov_imm reg, imm 361 | movl $\imm, %\reg 362 | .endm 363 | 364 | .macro save_regs reg1, reg2, reg3 365 | pushl %\reg1 366 | pushl %\reg2 367 | pushl %\reg3 368 | .endm 369 | ``` 370 | 371 | 3. 条件宏 372 | ```nasm 373 | .macro debug_print msg 374 | #ifdef DEBUG 375 | pushl %eax 376 | movl $\msg, %eax 377 | call print_debug 378 | popl %eax 379 | #endif 380 | .endm 381 | ``` 382 | 383 | ### 条件编译 384 | ```nasm 385 | #ifdef DEBUG 386 | movl $1, %eax 387 | #else 388 | movl $0, %eax 389 | #endif 390 | ``` 391 | 392 | 393 | ## Intel 汇编语法详解 394 | 395 | ### 基本语法规则 396 | ```nasm 397 | ; 基本格式:操作码 目标操作数, 源操作数 398 | mov eax, 42 ; 立即数到寄存器 399 | mov ebx, eax ; 寄存器到寄存器 400 | mov ebx, [eax] ; 内存到寄存器 401 | mov [ebx], eax ; 寄存器到内存 402 | ``` 403 | 404 | ### 伪指令详解 405 | Intel 风格的伪指令主要用于段定义和数据定义: 406 | 407 | 1. 段定义伪指令 408 | ```nasm 409 | section .text ; 代码段 410 | section .data ; 数据段 411 | section .bss # 未初始化数据段 412 | section .rdata # 只读数据段 413 | ``` 414 | 415 | 2. 符号定义伪指令 416 | ```nasm 417 | global main # 定义全局符号 418 | extern printf # 声明外部符号 419 | public func # 声明公共符号 420 | ``` 421 | 422 | 3. 数据定义伪指令 423 | ```nasm 424 | db 42 # 8位数据 425 | dw 42 # 16位数据 426 | dd 42 # 32位数据 427 | dq 42 # 64位数据 428 | db "Hello", 0 # 以null结尾的字符串 429 | times 10 db 0 # 重复定义 430 | align 4 # 4字节对齐 431 | ``` 432 | 433 | ### 宏定义详解 434 | Intel 风格的宏定义使用 MACRO 和 ENDM 关键字: 435 | 436 | 1. 基本宏定义 437 | ```nasm 438 | push_reg MACRO reg 439 | push reg 440 | ENDM 441 | 442 | pop_reg MACRO reg 443 | pop reg 444 | ENDM 445 | ``` 446 | 447 | 2. 带参数的宏 448 | ```nasm 449 | mov_imm MACRO reg, imm 450 | mov reg, imm 451 | ENDM 452 | 453 | save_regs MACRO reg1, reg2, reg3 454 | push reg1 455 | push reg2 456 | push reg3 457 | ENDM 458 | ``` 459 | 460 | 3. 条件宏 461 | ```nasm 462 | debug_print MACRO msg 463 | IFDEF DEBUG 464 | push eax 465 | mov eax, msg 466 | call print_debug 467 | pop eax 468 | ENDIF 469 | ENDM 470 | ``` 471 | 472 | ### 条件编译 473 | ```nasm 474 | IFDEF DEBUG 475 | mov eax, 1 476 | ELSE 477 | mov eax, 0 478 | ENDIF 479 | ``` 480 | 481 | ### 函数定义 482 | ```nasm 483 | global main 484 | main: 485 | push ebp 486 | mov ebp, esp 487 | ; 函数体 488 | mov esp, ebp 489 | pop ebp 490 | ret 491 | ``` 492 | 493 | ### 数据定义 494 | ```nasm 495 | section .data 496 | msg: db "Hello" 497 | len: dd 5 498 | array: dd 1, 2, 3, 4, 5 499 | ``` 500 | 501 | ### 注释风格 502 | ```nasm 503 | ; 单行注释 504 | ; 多行注释 505 | ; 每行都需要分号 506 | ``` 507 | 508 | ## Plan 9 汇编语法详解 509 | 510 | ### 基本语法规则 511 | ```nasm 512 | # 基本格式:操作码 源操作数, 目标操作数 513 | MOV $42, R0 ; 立即数到寄存器 514 | MOV R0, R1 ; 寄存器到寄存器 515 | MOV (R0), R1 ; 内存到寄存器 516 | MOV R1, (R0) ; 寄存器到内存 517 | ``` 518 | 519 | ### 伪指令详解 520 | Plan 9 风格的伪指令主要用于函数和数据定义: 521 | 522 | 1. 函数定义伪指令 523 | ```nasm 524 | TEXT ·main(SB), NOSPLIT, $0 # 函数定义 525 | TEXT ·func(SB), $0-8 # 带栈帧大小的函数定义 526 | ``` 527 | 528 | 2. 数据定义伪指令 529 | ```nasm 530 | DATA ·msg(SB)/8, $"Hello" # 数据定义 531 | GLOBL ·msg(SB), RODATA, $8 # 全局符号定义 532 | ``` 533 | 534 | 3. 特殊伪指令 535 | ```nasm 536 | NOSPLIT # 表示函数不需要栈增长检查 537 | RODATA # 只读数据段 538 | NOPTR # 不包含指针的数据 539 | ``` 540 | 541 | ### 函数定义详解 542 | Plan 9 的函数定义格式非常特殊,需要详细解释: 543 | 544 | 1. 基本格式 545 | ```nasm 546 | TEXT ·main(SB), NOSPLIT, $0 547 | ``` 548 | - `TEXT`: 表示这是一个函数 549 | - `·main`: 函数名,点号表示包名 550 | - `(SB)`: 静态基址,表示这是一个静态函数 551 | - `NOSPLIT`: 表示函数不需要栈增长检查 552 | - `$0`: 栈帧大小,0表示不需要栈空间 553 | 554 | 2. 带参数的函数 555 | ```nasm 556 | TEXT ·add(SB), NOSPLIT, $0-16 557 | ``` 558 | - `$0-16`: 表示栈帧大小为0,参数总大小为16字节 559 | 560 | 3. 带局部变量的函数 561 | ```nasm 562 | TEXT ·func(SB), $16-0 563 | ``` 564 | - `$16`: 表示需要16字节的栈空间 565 | - `-0`: 表示没有参数 566 | 567 | 4. 完整的函数示例 568 | ```nasm 569 | TEXT ·main(SB), NOSPLIT, $0 570 | MOV R29, RSP # 保存栈指针 571 | SUB $16, RSP # 分配栈空间 572 | MOV R0, 8(RSP) # 保存参数 573 | MOV R1, 16(RSP) # 保存参数 574 | # 函数体 575 | ADD $16, RSP # 恢复栈指针 576 | RET # 返回 577 | ``` 578 | 579 | ### 宏定义详解 580 | Plan 9 的宏定义使用 C 风格的预处理器指令: 581 | 582 | 1. 基本宏定义 583 | ```nasm 584 | #define PUSH(reg) MOV reg, (SP) 585 | #define POP(reg) MOV (SP), reg 586 | ``` 587 | 588 | 2. 带参数的宏 589 | ```nasm 590 | #define SAVE_REG(reg) MOV reg, -8(SP) 591 | #define RESTORE_REG(reg) MOV -8(SP), reg 592 | ``` 593 | 594 | 3. 条件宏 595 | ```nasm 596 | #ifdef DEBUG 597 | #define DEBUG_PRINT(msg) MOV $msg, R0; CALL print_debug 598 | #else 599 | #define DEBUG_PRINT(msg) 600 | #endif 601 | ``` 602 | 603 | ## 三种汇编语法结构对比 604 | 605 | ### 1. 指令格式 606 | 607 | #### AT&T 风格 608 | AT&T 风格的指令格式特点: 609 | 1. 操作数顺序:源操作数在前,目标操作数在后 610 | 2. 寄存器表示:寄存器名前加 `%` 符号 611 | 3. 立即数表示:立即数前加 `$` 符号 612 | 4. 内存引用:使用 `()` 表示内存地址 613 | 5. 操作数大小:使用后缀表示操作数大小(b/w/l/q) 614 | 615 | ```nasm 616 | # 基本格式:操作码 源操作数, 目标操作数 617 | movl $42, %eax # 立即数到寄存器 618 | movl %eax, %ebx # 寄存器到寄存器 619 | movl (%eax), %ebx # 内存到寄存器 620 | movl %eax, (%ebx) # 寄存器到内存 621 | 622 | # 操作数大小后缀 623 | movb $42, %al # 8位操作 624 | movw $42, %ax # 16位操作 625 | movl $42, %eax # 32位操作 626 | movq $42, %rax # 64位操作 627 | 628 | # 复杂内存寻址 629 | movl (%eax,%ebx,4), %ecx # 基址+变址*比例 630 | movl 8(%eax), %ebx # 带偏移的内存访问 631 | ``` 632 | 633 | #### Intel 风格 634 | Intel 风格的指令格式特点: 635 | 1. 操作数顺序:目标操作数在前,源操作数在后 636 | 2. 寄存器表示:直接使用寄存器名,不加前缀 637 | 3. 立即数表示:直接使用数值,不加前缀 638 | 4. 内存引用:使用 `[]` 表示内存地址 639 | 5. 操作数大小:由寄存器或内存操作数类型决定 640 | 641 | ```nasm 642 | ; 基本格式:操作码 目标操作数, 源操作数 643 | mov eax, 42 ; 立即数到寄存器 644 | mov ebx, eax ; 寄存器到寄存器 645 | mov ebx, [eax] ; 内存到寄存器 646 | mov [ebx], eax ; 寄存器到内存 647 | 648 | ; 不同大小的操作 649 | mov al, 42 ; 8位操作 650 | mov ax, 42 ; 16位操作 651 | mov eax, 42 ; 32位操作 652 | mov rax, 42 ; 64位操作 653 | 654 | ; 复杂内存寻址 655 | mov ecx, [eax+ebx*4] ; 基址+变址*比例 656 | mov ebx, [eax+8] ; 带偏移的内存访问 657 | ``` 658 | 659 | #### Plan 9 风格 660 | Plan 9 风格的指令格式特点: 661 | 1. 操作数顺序:源操作数在前,目标操作数在后 662 | 2. 寄存器表示:使用 `R` 前缀(如 R0, R1) 663 | 3. 立即数表示:立即数前加 `$` 符号 664 | 4. 内存引用:使用 `()` 表示内存地址 665 | 5. 指令大写:所有指令都使用大写形式 666 | 667 | ```nasm 668 | # 基本格式:操作码 源操作数, 目标操作数 669 | MOV $42, R0 ; 立即数到寄存器 670 | MOV R0, R1 ; 寄存器到寄存器 671 | MOV (R0), R1 ; 内存到寄存器 672 | MOV R1, (R0) ; 寄存器到内存 673 | 674 | # 不同大小的操作 675 | MOVB $42, R0 ; 8位操作 676 | MOVW $42, R0 ; 16位操作 677 | MOVL $42, R0 ; 32位操作 678 | MOVQ $42, R0 ; 64位操作 679 | 680 | # 复杂内存寻址 681 | MOV (R0)(R1*4), R2 ; 基址+变址*比例 682 | MOV 8(R0), R1 ; 带偏移的内存访问 683 | ``` 684 | 685 | ### 2. 伪指令 686 | 687 | #### AT&T 风格 688 | ```nasm 689 | .section .text # 代码段 690 | .section .data # 数据段 691 | .global main # 全局符号 692 | .long 42 # 32位整数 693 | .ascii "Hello" # ASCII字符串 694 | .asciz "Hello" # 以null结尾的字符串 695 | ``` 696 | 697 | #### Intel 风格 698 | ```nasm 699 | section .text ; 代码段 700 | section .data ; 数据段 701 | global main ; 全局符号 702 | dd 42 ; 32位整数 703 | db "Hello" ; ASCII字符串 704 | db "Hello", 0 ; 以null结尾的字符串 705 | ``` 706 | 707 | #### Plan 9 风格 708 | ```nasm 709 | TEXT ·main(SB), NOSPLIT, $0 ; 函数定义 710 | DATA ·msg(SB)/8, $"Hello" ; 数据定义 711 | GLOBL ·msg(SB), RODATA, $8 ; 全局符号 712 | ``` 713 | 714 | ### 3. 宏定义 715 | 716 | #### AT&T 风格 717 | ```nasm 718 | .macro push_reg reg 719 | pushl %\reg 720 | .endm 721 | 722 | .macro pop_reg reg 723 | popl %\reg 724 | .endm 725 | ``` 726 | 727 | #### Intel 风格 728 | ```nasm 729 | push_reg MACRO reg 730 | push reg 731 | ENDM 732 | 733 | pop_reg MACRO reg 734 | pop reg 735 | ENDM 736 | ``` 737 | 738 | #### Plan 9 风格 739 | ```nasm 740 | #define PUSH(reg) MOV reg, (SP) 741 | #define POP(reg) MOV (SP), reg 742 | ``` 743 | 744 | ### 4. 条件编译 745 | 746 | #### AT&T 风格 747 | ```nasm 748 | #ifdef DEBUG 749 | movl $1, %eax 750 | #else 751 | movl $0, %eax 752 | #endif 753 | ``` 754 | 755 | #### Intel 风格 756 | ```nasm 757 | IFDEF DEBUG 758 | mov eax, 1 759 | ELSE 760 | mov eax, 0 761 | ENDIF 762 | ``` 763 | 764 | #### Plan 9 风格 765 | ```nasm 766 | #ifdef DEBUG 767 | MOV $1, R0 768 | #else 769 | MOV $0, R0 770 | #endif 771 | ``` 772 | 773 | ### 5. 函数定义 774 | 775 | #### AT&T 风格 776 | ```nasm 777 | .global main 778 | main: 779 | pushl %ebp 780 | movl %esp, %ebp 781 | # 函数体 782 | movl %ebp, %esp 783 | popl %ebp 784 | ret 785 | ``` 786 | 787 | #### Intel 风格 788 | ```nasm 789 | global main 790 | main: 791 | push ebp 792 | mov ebp, esp 793 | ; 函数体 794 | mov esp, ebp 795 | pop ebp 796 | ret 797 | ``` 798 | 799 | #### Plan 9 风格 800 | ```nasm 801 | TEXT ·main(SB), NOSPLIT, $0 802 | MOV R29, RSP 803 | # 函数体 804 | RET 805 | ``` 806 | 807 | ### 6. 数据定义 808 | 809 | #### AT&T 风格 810 | ```nasm 811 | .section .data 812 | msg: .ascii "Hello" 813 | len: .long 5 814 | array: .long 1, 2, 3, 4, 5 815 | ``` 816 | 817 | #### Intel 风格 818 | ```nasm 819 | section .data 820 | msg: db "Hello" 821 | len: dd 5 822 | array: dd 1, 2, 3, 4, 5 823 | ``` 824 | 825 | #### Plan 9 风格 826 | ```nasm 827 | DATA ·msg(SB)/8, $"Hello" 828 | DATA ·len(SB)/4, $5 829 | DATA ·array(SB)/20, $1, $2, $3, $4, $5 830 | ``` 831 | 832 | ### 7. 注释风格 833 | 834 | #### AT&T 风格 835 | ```nasm 836 | # 单行注释 837 | /* 多行注释 838 | 可以跨越多行 */ 839 | ``` 840 | 841 | #### Intel 风格 842 | ```nasm 843 | ; 单行注释 844 | ; 多行注释 845 | ; 每行都需要分号 846 | ``` 847 | 848 | #### Plan 9 风格 849 | ```nasm 850 | # 单行注释 851 | /* 多行注释 852 | 可以跨越多行 */ 853 | ``` 854 | 855 | ## 寄存器详解 856 | 857 | ### 通用寄存器 858 | - `eax`: 累加器 859 | - `ebx`: 基址寄存器 860 | - `ecx`: 计数器 861 | - `edx`: 数据寄存器 862 | 863 | ### 特殊寄存器 864 | - `eip`: 指令指针 865 | - `esp`: 栈指针 866 | - `ebp`: 基址指针 867 | - `eflags`: 标志寄存器 868 | 869 | ### 标志位 870 | - `CF`: 进位标志 871 | - `ZF`: 零标志 872 | - `SF`: 符号标志 873 | - `OF`: 溢出标志 874 | 875 | ## 指令集详解 876 | 877 | ### 数据传输指令 878 | ```nasm 879 | mov # 移动数据 880 | push # 压栈 881 | pop # 出栈 882 | lea # 加载有效地址 883 | ``` 884 | 885 | ### 算术运算指令 886 | ```nasm 887 | add # 加法 888 | sub # 减法 889 | mul # 乘法 890 | div # 除法 891 | ``` 892 | 893 | ### 逻辑运算指令 894 | ```nasm 895 | and # 与 896 | or # 或 897 | xor # 异或 898 | not # 非 899 | ``` 900 | 901 | ### 移位指令 902 | ```nasm 903 | shl # 左移 904 | shr # 逻辑右移 905 | sar # 算术右移 906 | ``` 907 | 908 | ### 条件跳转指令 909 | ```nasm 910 | je # 相等跳转 911 | jne # 不相等跳转 912 | jg # 大于跳转 913 | jl # 小于跳转 914 | ``` 915 | 916 | ## 内存寻址 917 | 918 | ### 寻址模式 919 | 1. 立即寻址 920 | 2. 寄存器寻址 921 | 3. 直接寻址 922 | 4. 寄存器间接寻址 923 | 5. 基址寻址 924 | 6. 变址寻址 925 | 7. 基址变址寻址 926 | 927 | ### 内存对齐 928 | - 数据对齐原则 929 | - 对齐指令 930 | - 性能影响 931 | 932 | ## 程序结构 933 | 934 | ### 基本结构 935 | ```nasm 936 | .section .data 937 | ; 数据段 938 | 939 | .section .text 940 | .global main 941 | main: 942 | ; 代码段 943 | ``` 944 | 945 | ### 函数调用 946 | ```nasm 947 | # 函数调用约定 948 | push %ebp 949 | mov %esp, %ebp 950 | # 函数体 951 | mov %ebp, %esp 952 | pop %ebp 953 | ret 954 | ``` 955 | 956 | ### 栈操作 957 | - 栈帧结构 958 | - 参数传递 959 | - 局部变量 960 | 961 | ## 系统调用 962 | 963 | ### Linux 系统调用 964 | ```nasm 965 | # 32位系统调用 966 | mov $1, %eax # 系统调用号 967 | mov $1, %ebx # 参数1 968 | int $0x80 # 触发系统调用 969 | 970 | # 64位系统调用 971 | mov $1, %rax # 系统调用号 972 | mov $1, %rdi # 参数1 973 | syscall # 触发系统调用 974 | ``` 975 | 976 | ### 常用系统调用 977 | - 文件操作 978 | - 进程控制 979 | - 内存管理 980 | - 网络通信 981 | 982 | ## 实际应用示例 983 | 984 | ### Hello World 程序 985 | ```nasm 986 | .section .data 987 | msg: .ascii "Hello, World!\n" 988 | len: .equ $ - msg 989 | 990 | .section .text 991 | .global main 992 | main: 993 | # 写入系统调用 994 | mov $4, %eax # sys_write 995 | mov $1, %ebx # stdout 996 | mov $msg, %ecx # 消息地址 997 | mov $len, %edx # 消息长度 998 | int $0x80 999 | 1000 | # 退出系统调用 1001 | mov $1, %eax # sys_exit 1002 | xor %ebx, %ebx # 返回码 0 1003 | int $0x80 1004 | ``` 1005 | 1006 | ### 简单计算器 1007 | ```nasm 1008 | .section .data 1009 | num1: .long 10 1010 | num2: .long 20 1011 | result: .long 0 1012 | 1013 | .section .text 1014 | .global main 1015 | main: 1016 | mov num1, %eax 1017 | add num2, %eax 1018 | mov %eax, result 1019 | ret 1020 | ``` 1021 | 1022 | ## 调试技巧 1023 | 1024 | ### 常用调试工具 1025 | - GDB 1026 | - objdump 1027 | - strace 1028 | - ltrace 1029 | 1030 | ### 调试方法 1031 | - 断点设置 1032 | - 单步执行 1033 | - 寄存器查看 1034 | - 内存查看 1035 | 1036 | ## 性能优化 1037 | 1038 | ### 优化技巧 1039 | - 指令选择 1040 | - 寄存器使用 1041 | - 内存访问 1042 | - 分支预测 1043 | 1044 | ### 常见陷阱 1045 | - 指令延迟 1046 | - 缓存效应 1047 | - 流水线停顿 1048 | - 分支预测失败 1049 | 1050 | ## 总结 1051 | 1052 | 汇编语言虽然复杂,但掌握它对于理解计算机底层原理和进行系统级编程非常重要。通过本文的学习,您应该能够: 1053 | 1. 理解汇编语言的基本概念 1054 | 2. 掌握 AT&T 和 Intel 两种语法 1055 | 3. 编写简单的汇编程序 1056 | 4. 进行基本的调试和优化 1057 | 1058 | ## 参考资料 1059 | 1. Intel 64 and IA-32 Architectures Software Developer's Manual 1060 | 2. AT&T Assembly Language Reference 1061 | 3. Linux System Call Table 1062 | 4. x86 Assembly Language Reference Manual -------------------------------------------------------------------------------- /compiler/compile/token/file.go: -------------------------------------------------------------------------------- 1 | package token 2 | 3 | import ( 4 | "cmp" 5 | "fmt" 6 | "slices" 7 | "strconv" 8 | "sync" 9 | "sync/atomic" 10 | ) 11 | 12 | // ----------------------------------------------------------------------------- 13 | // Positions 14 | 15 | // Position describes an arbitrary source position 16 | // including the file, line, and column location. 17 | // A Position is valid if the line number is > 0. 18 | type Position struct { 19 | Filename string // filename, if any 20 | Offset int // offset, starting at 0 21 | Line int // line number, starting at 1 22 | Column int // column number, starting at 1 (byte count) 23 | } 24 | 25 | // IsValid reports whether the position is valid. 26 | func (pos *Position) IsValid() bool { return pos.Line > 0 } 27 | 28 | // String returns a string in one of several forms: 29 | // 30 | // file:line:column valid position with file name 31 | // file:line valid position with file name but no column (column == 0) 32 | // line:column valid position without file name 33 | // line valid position without file name and no column (column == 0) 34 | // file invalid position with file name 35 | // - invalid position without file name 36 | func (pos Position) String() string { 37 | s := pos.Filename 38 | if pos.IsValid() { 39 | if s != "" { 40 | s += ":" 41 | } 42 | s += strconv.Itoa(pos.Line) 43 | if pos.Column != 0 { 44 | s += fmt.Sprintf(":%d", pos.Column) 45 | } 46 | } 47 | if s == "" { 48 | s = "-" 49 | } 50 | return s 51 | } 52 | 53 | // Pos is a compact encoding of a source position within a file set. 54 | // It can be converted into a [Position] for a more convenient, but much 55 | // larger, representation. 56 | // 57 | // The Pos value for a given file is a number in the range [base, base+size], 58 | // where base and size are specified when a file is added to the file set. 59 | // The difference between a Pos value and the corresponding file base 60 | // corresponds to the byte offset of that position (represented by the Pos value) 61 | // from the beginning of the file. Thus, the file base offset is the Pos value 62 | // representing the first byte in the file. 63 | // 64 | // To create the Pos value for a specific source offset (measured in bytes), 65 | // first add the respective file to the current file set using [FileSet.AddFile] 66 | // and then call [File.Pos](offset) for that file. Given a Pos value p 67 | // for a specific file set fset, the corresponding [Position] value is 68 | // obtained by calling fset.Position(p). 69 | // 70 | // Pos values can be compared directly with the usual comparison operators: 71 | // If two Pos values p and q are in the same file, comparing p and q is 72 | // equivalent to comparing the respective source file offsets. If p and q 73 | // are in different files, p < q is true if the file implied by p was added 74 | // to the respective file set before the file implied by q. 75 | type Pos int 76 | 77 | // The zero value for [Pos] is NoPos; there is no file and line information 78 | // associated with it, and NoPos.IsValid() is false. NoPos is always 79 | // smaller than any other [Pos] value. The corresponding [Position] value 80 | // for NoPos is the zero value for [Position]. 81 | const NoPos Pos = 0 82 | 83 | // IsValid reports whether the position is valid. 84 | func (p Pos) IsValid() bool { 85 | return p != NoPos 86 | } 87 | 88 | // ----------------------------------------------------------------------------- 89 | // File 90 | 91 | // A File is a handle for a file belonging to a [FileSet]. 92 | // A File has a name, size, and line offset table. 93 | type File struct { 94 | name string // file name as provided to AddFile 95 | base int // Pos value range for this file is [base...base+size] 96 | size int // file size as provided to AddFile 97 | 98 | // lines and infos are protected by mutex 99 | mutex sync.Mutex 100 | lines []int // lines contains the offset of the first character for each line (the first entry is always 0) 101 | infos []lineInfo 102 | } 103 | 104 | // Name returns the file name of file f as registered with AddFile. 105 | func (f *File) Name() string { 106 | return f.name 107 | } 108 | 109 | // Base returns the base offset of file f as registered with AddFile. 110 | func (f *File) Base() int { 111 | return f.base 112 | } 113 | 114 | // Size returns the size of file f as registered with AddFile. 115 | func (f *File) Size() int { 116 | return f.size 117 | } 118 | 119 | // LineCount returns the number of lines in file f. 120 | func (f *File) LineCount() int { 121 | f.mutex.Lock() 122 | n := len(f.lines) 123 | f.mutex.Unlock() 124 | return n 125 | } 126 | 127 | // AddLine adds the line offset for a new line. 128 | // The line offset must be larger than the offset for the previous line 129 | // and smaller than the file size; otherwise the line offset is ignored. 130 | func (f *File) AddLine(offset int) { 131 | f.mutex.Lock() 132 | if i := len(f.lines); (i == 0 || f.lines[i-1] < offset) && offset < f.size { 133 | f.lines = append(f.lines, offset) 134 | } 135 | f.mutex.Unlock() 136 | } 137 | 138 | // MergeLine merges a line with the following line. It is akin to replacing 139 | // the newline character at the end of the line with a space (to not change the 140 | // remaining offsets). To obtain the line number, consult e.g. [Position.Line]. 141 | // MergeLine will panic if given an invalid line number. 142 | func (f *File) MergeLine(line int) { 143 | if line < 1 { 144 | panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 145 | } 146 | f.mutex.Lock() 147 | defer f.mutex.Unlock() 148 | if line >= len(f.lines) { 149 | panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 150 | } 151 | // To merge the line numbered with the line numbered , 152 | // we need to remove the entry in lines corresponding to the line 153 | // numbered . The entry in lines corresponding to the line 154 | // numbered is located at index , since indices in lines 155 | // are 0-based and line numbers are 1-based. 156 | copy(f.lines[line:], f.lines[line+1:]) 157 | f.lines = f.lines[:len(f.lines)-1] 158 | } 159 | 160 | // Lines returns the effective line offset table of the form described by [File.SetLines]. 161 | // Callers must not mutate the result. 162 | func (f *File) Lines() []int { 163 | f.mutex.Lock() 164 | lines := f.lines 165 | f.mutex.Unlock() 166 | return lines 167 | } 168 | 169 | // SetLines sets the line offsets for a file and reports whether it succeeded. 170 | // The line offsets are the offsets of the first character of each line; 171 | // for instance for the content "ab\nc\n" the line offsets are {0, 3}. 172 | // An empty file has an empty line offset table. 173 | // Each line offset must be larger than the offset for the previous line 174 | // and smaller than the file size; otherwise SetLines fails and returns 175 | // false. 176 | // Callers must not mutate the provided slice after SetLines returns. 177 | func (f *File) SetLines(lines []int) bool { 178 | // verify validity of lines table 179 | size := f.size 180 | for i, offset := range lines { 181 | if i > 0 && offset <= lines[i-1] || size <= offset { 182 | return false 183 | } 184 | } 185 | 186 | // set lines table 187 | f.mutex.Lock() 188 | f.lines = lines 189 | f.mutex.Unlock() 190 | return true 191 | } 192 | 193 | // SetLinesForContent sets the line offsets for the given file content. 194 | // It ignores position-altering //line comments. 195 | func (f *File) SetLinesForContent(content []byte) { 196 | var lines []int 197 | line := 0 198 | for offset, b := range content { 199 | if line >= 0 { 200 | lines = append(lines, line) 201 | } 202 | line = -1 203 | if b == '\n' { 204 | line = offset + 1 205 | } 206 | } 207 | 208 | // set lines table 209 | f.mutex.Lock() 210 | f.lines = lines 211 | f.mutex.Unlock() 212 | } 213 | 214 | // LineStart returns the [Pos] value of the start of the specified line. 215 | // It ignores any alternative positions set using [File.AddLineColumnInfo]. 216 | // LineStart panics if the 1-based line number is invalid. 217 | func (f *File) LineStart(line int) Pos { 218 | if line < 1 { 219 | panic(fmt.Sprintf("invalid line number %d (should be >= 1)", line)) 220 | } 221 | f.mutex.Lock() 222 | defer f.mutex.Unlock() 223 | if line > len(f.lines) { 224 | panic(fmt.Sprintf("invalid line number %d (should be < %d)", line, len(f.lines))) 225 | } 226 | return Pos(f.base + f.lines[line-1]) 227 | } 228 | 229 | // A lineInfo object describes alternative file, line, and column 230 | // number information (such as provided via a //line directive) 231 | // for a given file offset. 232 | type lineInfo struct { 233 | // fields are exported to make them accessible to gob 234 | Offset int 235 | Filename string 236 | Line, Column int 237 | } 238 | 239 | // AddLineInfo is like [File.AddLineColumnInfo] with a column = 1 argument. 240 | // It is here for backward-compatibility for code prior to Go 1.11. 241 | func (f *File) AddLineInfo(offset int, filename string, line int) { 242 | f.AddLineColumnInfo(offset, filename, line, 1) 243 | } 244 | 245 | // AddLineColumnInfo adds alternative file, line, and column number 246 | // information for a given file offset. The offset must be larger 247 | // than the offset for the previously added alternative line info 248 | // and smaller than the file size; otherwise the information is 249 | // ignored. 250 | // 251 | // AddLineColumnInfo is typically used to register alternative position 252 | // information for line directives such as //line filename:line:column. 253 | func (f *File) AddLineColumnInfo(offset int, filename string, line, column int) { 254 | f.mutex.Lock() 255 | if i := len(f.infos); (i == 0 || f.infos[i-1].Offset < offset) && offset < f.size { 256 | f.infos = append(f.infos, lineInfo{offset, filename, line, column}) 257 | } 258 | f.mutex.Unlock() 259 | } 260 | 261 | // fixOffset fixes an out-of-bounds offset such that 0 <= offset <= f.size. 262 | func (f *File) fixOffset(offset int) int { 263 | switch { 264 | case offset < 0: 265 | if !debug { 266 | return 0 267 | } 268 | case offset > f.size: 269 | if !debug { 270 | return f.size 271 | } 272 | default: 273 | return offset 274 | } 275 | 276 | // only generate this code if needed 277 | if debug { 278 | panic(fmt.Sprintf("offset %d out of bounds [%d, %d] (position %d out of bounds [%d, %d])", 279 | 0 /* for symmetry */, offset, f.size, 280 | f.base+offset, f.base, f.base+f.size)) 281 | } 282 | return 0 283 | } 284 | 285 | // Pos returns the Pos value for the given file offset. 286 | // 287 | // If offset is negative, the result is the file's start 288 | // position; if the offset is too large, the result is 289 | // the file's end position (see also go.dev/issue/57490). 290 | // 291 | // The following invariant, though not true for Pos values 292 | // in general, holds for the result p: 293 | // f.Pos(f.Offset(p)) == p. 294 | func (f *File) Pos(offset int) Pos { 295 | return Pos(f.base + f.fixOffset(offset)) 296 | } 297 | 298 | // Offset returns the offset for the given file position p. 299 | // 300 | // If p is before the file's start position (or if p is NoPos), 301 | // the result is 0; if p is past the file's end position, 302 | // the result is the file size (see also go.dev/issue/57490). 303 | // 304 | // The following invariant, though not true for offset values 305 | // in general, holds for the result offset: 306 | // f.Offset(f.Pos(offset)) == offset 307 | func (f *File) Offset(p Pos) int { 308 | return f.fixOffset(int(p) - f.base) 309 | } 310 | 311 | // Line returns the line number for the given file position p; 312 | // p must be a [Pos] value in that file or [NoPos]. 313 | func (f *File) Line(p Pos) int { 314 | return f.Position(p).Line 315 | } 316 | 317 | func searchLineInfos(a []lineInfo, x int) int { 318 | i, found := slices.BinarySearchFunc(a, x, func(a lineInfo, x int) int { 319 | return cmp.Compare(a.Offset, x) 320 | }) 321 | if !found { 322 | // We want the lineInfo containing x, but if we didn't 323 | // find x then i is the next one. 324 | i-- 325 | } 326 | return i 327 | } 328 | 329 | // unpack returns the filename and line and column number for a file offset. 330 | // If adjusted is set, unpack will return the filename and line information 331 | // possibly adjusted by //line comments; otherwise those comments are ignored. 332 | func (f *File) unpack(offset int, adjusted bool) (filename string, line, column int) { 333 | f.mutex.Lock() 334 | filename = f.name 335 | if i := searchInts(f.lines, offset); i >= 0 { 336 | line, column = i+1, offset-f.lines[i]+1 337 | } 338 | if adjusted && len(f.infos) > 0 { 339 | // few files have extra line infos 340 | if i := searchLineInfos(f.infos, offset); i >= 0 { 341 | alt := &f.infos[i] 342 | filename = alt.Filename 343 | if i := searchInts(f.lines, alt.Offset); i >= 0 { 344 | // i+1 is the line at which the alternative position was recorded 345 | d := line - (i + 1) // line distance from alternative position base 346 | line = alt.Line + d 347 | if alt.Column == 0 { 348 | // alternative column is unknown => relative column is unknown 349 | // (the current specification for line directives requires 350 | // this to apply until the next PosBase/line directive, 351 | // not just until the new newline) 352 | column = 0 353 | } else if d == 0 { 354 | // the alternative position base is on the current line 355 | // => column is relative to alternative column 356 | column = alt.Column + (offset - alt.Offset) 357 | } 358 | } 359 | } 360 | } 361 | // TODO(mvdan): move Unlock back under Lock with a defer statement once 362 | // https://go.dev/issue/38471 is fixed to remove the performance penalty. 363 | f.mutex.Unlock() 364 | return 365 | } 366 | 367 | func (f *File) position(p Pos, adjusted bool) (pos Position) { 368 | offset := f.fixOffset(int(p) - f.base) 369 | pos.Offset = offset 370 | pos.Filename, pos.Line, pos.Column = f.unpack(offset, adjusted) 371 | return 372 | } 373 | 374 | // PositionFor returns the Position value for the given file position p. 375 | // If p is out of bounds, it is adjusted to match the File.Offset behavior. 376 | // If adjusted is set, the position may be adjusted by position-altering 377 | // //line comments; otherwise those comments are ignored. 378 | // p must be a Pos value in f or NoPos. 379 | func (f *File) PositionFor(p Pos, adjusted bool) (pos Position) { 380 | if p != NoPos { 381 | pos = f.position(p, adjusted) 382 | } 383 | return 384 | } 385 | 386 | // Position returns the Position value for the given file position p. 387 | // If p is out of bounds, it is adjusted to match the File.Offset behavior. 388 | // Calling f.Position(p) is equivalent to calling f.PositionFor(p, true). 389 | func (f *File) Position(p Pos) (pos Position) { 390 | return f.PositionFor(p, true) 391 | } 392 | 393 | // ----------------------------------------------------------------------------- 394 | // FileSet 395 | 396 | // A FileSet represents a set of source files. 397 | // Methods of file sets are synchronized; multiple goroutines 398 | // may invoke them concurrently. 399 | // 400 | // The byte offsets for each file in a file set are mapped into 401 | // distinct (integer) intervals, one interval [base, base+size] 402 | // per file. [FileSet.Base] represents the first byte in the file, and size 403 | // is the corresponding file size. A [Pos] value is a value in such 404 | // an interval. By determining the interval a [Pos] value belongs 405 | // to, the file, its file base, and thus the byte offset (position) 406 | // the [Pos] value is representing can be computed. 407 | // 408 | // When adding a new file, a file base must be provided. That can 409 | // be any integer value that is past the end of any interval of any 410 | // file already in the file set. For convenience, [FileSet.Base] provides 411 | // such a value, which is simply the end of the Pos interval of the most 412 | // recently added file, plus one. Unless there is a need to extend an 413 | // interval later, using the [FileSet.Base] should be used as argument 414 | // for [FileSet.AddFile]. 415 | // 416 | // A [File] may be removed from a FileSet when it is no longer needed. 417 | // This may reduce memory usage in a long-running application. 418 | type FileSet struct { 419 | mutex sync.RWMutex // protects the file set 420 | base int // base offset for the next file 421 | files []*File // list of files in the order added to the set 422 | last atomic.Pointer[File] // cache of last file looked up 423 | } 424 | 425 | // NewFileSet creates a new file set. 426 | func NewFileSet() *FileSet { 427 | return &FileSet{ 428 | base: 1, // 0 == NoPos 429 | } 430 | } 431 | 432 | // Base returns the minimum base offset that must be provided to 433 | // [FileSet.AddFile] when adding the next file. 434 | func (s *FileSet) Base() int { 435 | s.mutex.RLock() 436 | b := s.base 437 | s.mutex.RUnlock() 438 | return b 439 | } 440 | 441 | // AddFile adds a new file with a given filename, base offset, and file size 442 | // to the file set s and returns the file. Multiple files may have the same 443 | // name. The base offset must not be smaller than the [FileSet.Base], and 444 | // size must not be negative. As a special case, if a negative base is provided, 445 | // the current value of the [FileSet.Base] is used instead. 446 | // 447 | // Adding the file will set the file set's [FileSet.Base] value to base + size + 1 448 | // as the minimum base value for the next file. The following relationship 449 | // exists between a [Pos] value p for a given file offset offs: 450 | // 451 | // int(p) = base + offs 452 | // 453 | // with offs in the range [0, size] and thus p in the range [base, base+size]. 454 | // For convenience, [File.Pos] may be used to create file-specific position 455 | // values from a file offset. 456 | func (s *FileSet) AddFile(filename string, base, size int) *File { 457 | // Allocate f outside the critical section. 458 | f := &File{name: filename, size: size, lines: []int{0}} 459 | 460 | s.mutex.Lock() 461 | defer s.mutex.Unlock() 462 | if base < 0 { 463 | base = s.base 464 | } 465 | if base < s.base { 466 | panic(fmt.Sprintf("invalid base %d (should be >= %d)", base, s.base)) 467 | } 468 | f.base = base 469 | if size < 0 { 470 | panic(fmt.Sprintf("invalid size %d (should be >= 0)", size)) 471 | } 472 | // base >= s.base && size >= 0 473 | base += size + 1 // +1 because EOF also has a position 474 | if base < 0 { 475 | panic("token.Pos offset overflow (> 2G of source code in file set)") 476 | } 477 | // add the file to the file set 478 | s.base = base 479 | s.files = append(s.files, f) 480 | s.last.Store(f) 481 | return f 482 | } 483 | 484 | // RemoveFile removes a file from the [FileSet] so that subsequent 485 | // queries for its [Pos] interval yield a negative result. 486 | // This reduces the memory usage of a long-lived [FileSet] that 487 | // encounters an unbounded stream of files. 488 | // 489 | // Removing a file that does not belong to the set has no effect. 490 | func (s *FileSet) RemoveFile(file *File) { 491 | s.last.CompareAndSwap(file, nil) // clear last file cache 492 | 493 | s.mutex.Lock() 494 | defer s.mutex.Unlock() 495 | 496 | if i := searchFiles(s.files, file.base); i >= 0 && s.files[i] == file { 497 | last := &s.files[len(s.files)-1] 498 | s.files = slices.Delete(s.files, i, i+1) 499 | *last = nil // don't prolong lifetime when popping last element 500 | } 501 | } 502 | 503 | // Iterate calls f for the files in the file set in the order they were added 504 | // until f returns false. 505 | func (s *FileSet) Iterate(f func(*File) bool) { 506 | for i := 0; ; i++ { 507 | var file *File 508 | s.mutex.RLock() 509 | if i < len(s.files) { 510 | file = s.files[i] 511 | } 512 | s.mutex.RUnlock() 513 | if file == nil || !f(file) { 514 | break 515 | } 516 | } 517 | } 518 | 519 | func searchFiles(a []*File, x int) int { 520 | i, found := slices.BinarySearchFunc(a, x, func(a *File, x int) int { 521 | return cmp.Compare(a.base, x) 522 | }) 523 | if !found { 524 | // We want the File containing x, but if we didn't 525 | // find x then i is the next one. 526 | i-- 527 | } 528 | return i 529 | } 530 | 531 | func (s *FileSet) file(p Pos) *File { 532 | // common case: p is in last file. 533 | if f := s.last.Load(); f != nil && f.base <= int(p) && int(p) <= f.base+f.size { 534 | return f 535 | } 536 | 537 | s.mutex.RLock() 538 | defer s.mutex.RUnlock() 539 | 540 | // p is not in last file - search all files 541 | if i := searchFiles(s.files, int(p)); i >= 0 { 542 | f := s.files[i] 543 | // f.base <= int(p) by definition of searchFiles 544 | if int(p) <= f.base+f.size { 545 | // Update cache of last file. A race is ok, 546 | // but an exclusive lock causes heavy contention. 547 | s.last.Store(f) 548 | return f 549 | } 550 | } 551 | return nil 552 | } 553 | 554 | // File returns the file that contains the position p. 555 | // If no such file is found (for instance for p == [NoPos]), 556 | // the result is nil. 557 | func (s *FileSet) File(p Pos) (f *File) { 558 | if p != NoPos { 559 | f = s.file(p) 560 | } 561 | return 562 | } 563 | 564 | // PositionFor converts a [Pos] p in the fileset into a [Position] value. 565 | // If adjusted is set, the position may be adjusted by position-altering 566 | // //line comments; otherwise those comments are ignored. 567 | // p must be a [Pos] value in s or [NoPos]. 568 | func (s *FileSet) PositionFor(p Pos, adjusted bool) (pos Position) { 569 | if p != NoPos { 570 | if f := s.file(p); f != nil { 571 | return f.position(p, adjusted) 572 | } 573 | } 574 | return 575 | } 576 | 577 | // Position converts a [Pos] p in the fileset into a Position value. 578 | // Calling s.Position(p) is equivalent to calling s.PositionFor(p, true). 579 | func (s *FileSet) Position(p Pos) (pos Position) { 580 | return s.PositionFor(p, true) 581 | } 582 | 583 | // ----------------------------------------------------------------------------- 584 | // Helper functions 585 | 586 | func searchInts(a []int, x int) int { 587 | // This function body is a manually inlined version of: 588 | // 589 | // return sort.Search(len(a), func(i int) bool { return a[i] > x }) - 1 590 | // 591 | // With better compiler optimizations, this may not be needed in the 592 | // future, but at the moment this change improves the go/printer 593 | // benchmark performance by ~30%. This has a direct impact on the 594 | // speed of gofmt and thus seems worthwhile (2011-04-29). 595 | // TODO(gri): Remove this when compilers have caught up. 596 | i, j := 0, len(a) 597 | for i < j { 598 | h := int(uint(i+j) >> 1) // avoid overflow when computing h 599 | // i ≤ h < j 600 | if a[h] <= x { 601 | i = h + 1 602 | } else { 603 | j = h 604 | } 605 | } 606 | return i - 1 607 | } 608 | --------------------------------------------------------------------------------