├── .gdbinit ├── .gitignore ├── .hgignore ├── COPYING ├── Makefile ├── README ├── debug ├── fstools └── mkfs │ ├── Makefile │ └── mkfs.c ├── include └── kdef.h ├── mkdiskimg ├── run └── src ├── asmops.h ├── ata.c ├── ata.h ├── bdev.c ├── bdev.h ├── boot ├── mboot.S └── mboot.h ├── config.h ├── desc.h ├── file.h ├── fs.c ├── fs.h ├── fs_sys.c ├── interrupts.h ├── intr-asm.S ├── intr.c ├── intr.h ├── klibc ├── assert.h ├── ctype.c ├── ctype.h ├── errno.h ├── inttypes.h ├── malloc.c ├── stdarg.h ├── stdio.c ├── stdio.h ├── stdlib.c ├── stdlib.h ├── string.c ├── string.h ├── time.c └── time.h ├── main.c ├── mem.c ├── mem.h ├── mutex.c ├── mutex.h ├── panic.c ├── panic.h ├── part.c ├── part.h ├── proc-asm.S ├── proc.c ├── proc.h ├── rbtree.c ├── rbtree.h ├── regs.S ├── rtc.c ├── rtc.h ├── sched.c ├── sched.h ├── segm-asm.S ├── segm.c ├── segm.h ├── syscall.c ├── syscall.h ├── term.c ├── term.h ├── test_proc.S ├── timer.c ├── timer.h ├── tss.h ├── vid.c ├── vid.h ├── vm-asm.S ├── vm.c └── vm.h /.gdbinit: -------------------------------------------------------------------------------- 1 | file kernel.elf 2 | target remote localhost:1234 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.d 2 | *.o 3 | *.swp 4 | *.elf 5 | *.img 6 | *.map 7 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | \.d$ 2 | \.o$ 3 | \.swp$ 4 | ^kernel.elf$ 5 | ^link.map$ 6 | ^disk.img$ 7 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | ifneq ($(shell uname -m), i386) 2 | # -m32 instructs the compiler to produce 32bit code 3 | ccemu = -m32 4 | 5 | ifeq ($(shell uname -s), FreeBSD) 6 | ldemu = -m elf_i386_fbsd 7 | else 8 | ldemu = -m elf_i386 9 | endif 10 | endif 11 | 12 | # collect all of our C and assembly source files 13 | csrc = $(wildcard src/boot/*.c) $(wildcard src/*.c) $(wildcard src/klibc/*.c) 14 | asmsrc = $(wildcard src/boot/*.S) $(wildcard src/*.S) $(wildcard src/klibc/*.S) 15 | dep = $(asmsrc:.S=.d) $(csrc:.c=.d) 16 | 17 | # each source file will generate one object file 18 | obj = $(asmsrc:.S=.o) $(csrc:.c=.o) 19 | 20 | CC = gcc 21 | 22 | inc = -Isrc -Isrc/klibc -Isrc/boot -Iinclude 23 | 24 | # -nostdinc instructs the compiler to ignore standard include directories 25 | CFLAGS = $(ccemu) -Wall -g -nostdinc -fno-builtin $(inc) -DKERNEL 26 | ASFLAGS = $(ccemu) -g -nostdinc -fno-builtin $(inc) 27 | 28 | bin = kernel.elf 29 | 30 | # default target: make an ELF binary by linking the object files 31 | # we need to specify where to assume the text section (code) is going 32 | # in memory, as well as the kernel entry point (kentry). 33 | $(bin): $(obj) 34 | ld $(ldemu) -o $@ -Ttext 0x100000 -e kentry $(obj) -Map link.map 35 | 36 | %.s: %.c 37 | $(CC) $(CFLAGS) -S -o $@ $< 38 | 39 | -include $(dep) 40 | 41 | %.d: %.c 42 | @$(CPP) $(CFLAGS) -MM -MT $(@:.d=.o) $< >$@ 43 | 44 | %.d: %.S 45 | @$(CPP) $(ASFLAGS) -MM -MT $(@:.d=.o) $< >$@ 46 | 47 | .PHONY: clean 48 | clean: 49 | rm -f $(obj) $(bin) 50 | 51 | .PHONY: cleandep 52 | cleandep: 53 | rm -f $(dep) 54 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | A minimal UNIX kernel project for x86 PCs 2 | ----------------------------------------- 3 | Author: John Tsiombikas 4 | 5 | License: GNU GPL v3 or any later version published by the free software 6 | foundation. See COPYING for details. 7 | 8 | About 9 | ----- 10 | When the editor of the Linux Inside magazine asked me to write for his newborn 11 | magazine, I leapt at the chance to start a series of introductory articles on 12 | kernel programming. 13 | For this reason I started writing a minimal, UNIX-like, operating system kernel 14 | from scratch. It's not complete yet, but it already has user/kernel protection 15 | levels, a virtual memory system with copy-on-write, processes, task switching, 16 | etc. 17 | 18 | The articles themselves (only in Greek at the moment unfortunately) can be found 19 | at: http://nuclear.mutantstargoat.com/articles/kerneldev/index.html 20 | 21 | 22 | Compiling & Running 23 | ------------------- 24 | To compile kernel image (kernel.elf) just type make (or gmake, if your default 25 | make tool is not GNU make). A script called "run" is supplied that will use qemu 26 | to run the kernel in a virtual machine. 27 | 28 | If you wish to boot up the kernel on your computer you need a multiboot 29 | compliant boot loader like GRUB. Since you probably already have GRUB installed 30 | if you're running a GNU/Linux distribution, just add an entry to the GRUB config 31 | file (commonly /boot/grub/menu.lst) similar to this one and reboot: 32 | 33 | title My kernel 34 | root (hdX,Y) # where X is the disk number and Y the partition (first being 0) 35 | kernel /path/to/kernel_code/kernel.elf # change this to the actual path 36 | 37 | In case you are using GRUB 2 instead of the original GRUB, you'll have to 38 | consult the documentation on how to modify the above into the GRUB 2 syntax. 39 | 40 | Disclaimer 41 | ---------- 42 | All programs contain bugs, and this sample code will undoubtedly have its share. 43 | Running buggy kernel code on the real machine is very dangerous and may result 44 | in data loss or even damage to your computer in extreme cases. The author 45 | assumes no responsibility for any such eventuality. Run this code on your 46 | computer at your own risk. 47 | 48 | Contact 49 | ------- 50 | For any comments or suggestions regarding the articles or this supplemental 51 | code, please contact me at nuclear@member.fsf.org 52 | -------------------------------------------------------------------------------- /debug: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | cmdline=`cat run | grep qemu | sed s/^qemu//` 4 | 5 | qemu -s -S $cmdline & 6 | gdb 7 | -------------------------------------------------------------------------------- /fstools/mkfs/Makefile: -------------------------------------------------------------------------------- 1 | ksrc = ../../src 2 | 3 | obj = mkfs.o fs.o 4 | dep = $(obj:.o=.d) 5 | bin = mkfs 6 | 7 | CC = gcc 8 | CFLAGS = -pedantic -Wall -g -I$(ksrc) 9 | 10 | $(bin): $(obj) 11 | $(CC) -o $@ $(obj) $(LDFLAGS) 12 | 13 | -include $(dep) 14 | 15 | fs.o: $(ksrc)/fs.c 16 | $(CC) $(CFLAGS) -c $< -o $@ 17 | 18 | %.d: %.c 19 | @$(CPP) $(CFLAGS) $< -MM -MT $(@:.d=.o) >$@ 20 | 21 | .PHONY: clean 22 | clean: 23 | rm -f $(obj) $(bin) $(dep) 24 | -------------------------------------------------------------------------------- /fstools/mkfs/mkfs.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #ifdef __linux__ 11 | #include 12 | #endif 13 | #ifdef __darwin__ 14 | #include 15 | #endif 16 | #include "fs.h" 17 | 18 | int mkfs(int fd, int blksize, uint32_t nblocks); 19 | uint32_t get_block_count(int fd, int blksize); 20 | int user_readblock(int dev, uint32_t blk, void *buf); 21 | int user_writeblock(int dev, uint32_t blk, void *buf); 22 | int parse_args(int argc, char **argv); 23 | 24 | int fd; 25 | uint32_t num_blocks; 26 | 27 | int main(int argc, char **argv) 28 | { 29 | if(parse_args(argc, argv) == -1) { 30 | return 1; 31 | } 32 | 33 | if((num_blocks = get_block_count(fd, BLKSZ)) == 0) { 34 | fprintf(stderr, "could not determine the number of blocks\n"); 35 | return 1; 36 | } 37 | printf("total blocks: %u\n", (unsigned int)num_blocks); 38 | 39 | if(mkfs(fd, num_blocks) == -1) { 40 | return 1; 41 | } 42 | 43 | return 0; 44 | } 45 | 46 | int mkfs(int fd, int blksize, uint32_t nblocks) 47 | { 48 | struct superblock *sb; 49 | 50 | sb = malloc(BLKSZ); 51 | assert(sb); 52 | 53 | sb->magic = MAGIC; 54 | sb->ver = 0; 55 | sb->num_blocks = nblocks; 56 | } 57 | 58 | uint32_t get_block_count(int fd, int blksize) 59 | { 60 | unsigned long sz = 0; 61 | uint64_t sz64 = 0; 62 | struct stat st; 63 | 64 | #ifdef BLKGETSIZE64 65 | if(ioctl(fd, BLKGETSIZE64, &sz64) != -1) { 66 | return sz64 / blksize; 67 | } 68 | #endif 69 | 70 | #ifdef BLKGETSIZE 71 | if(ioctl(fd, BLKGETSIZE, &sz) != -1) { 72 | return sz / (blksize / 512); 73 | } 74 | #endif 75 | 76 | #ifdef DKIOCGETBLOCKCOUNT 77 | if(ioctl(fd, DKIOCGETBLOCKCOUNT, &sz64) != -1) { 78 | return sz64 / (blksize / 512); 79 | } 80 | #endif 81 | 82 | if(fstat(fd, &st) != -1 && S_ISREG(st.st_mode)) { 83 | return st.st_size / blksize; 84 | } 85 | 86 | return 0; 87 | } 88 | 89 | int blk_read(void*, uint32_t blk, int count, void *buf) 90 | { 91 | if(lseek(fd, blk * BLKSZ, SEEK_SET) == -1) { 92 | return -1; 93 | } 94 | if(read(fd, buf, BLKSZ * count) < BLKSZ * count) { 95 | return -1; 96 | } 97 | return 0; 98 | } 99 | 100 | int blk_write(void*, uint32_t blk, int count, void *buf) 101 | { 102 | if(lseek(fd, blk * BLKSZ, SEEK_SET) == -1) { 103 | return -1; 104 | } 105 | if(write(fd, buf, BLKSZ * count) < BLKSZ * count) { 106 | return -1; 107 | } 108 | return 0; 109 | } 110 | 111 | int parse_args(int argc, char **argv) 112 | { 113 | int i; 114 | 115 | fd = -1; 116 | 117 | for(i=1; i\n", argv[0]); 122 | exit(0); 123 | 124 | default: 125 | goto invalid; 126 | } 127 | } else { 128 | if(fd != -1) { 129 | goto invalid; 130 | } 131 | 132 | if((fd = open(argv[i], O_RDWR)) == -1) { 133 | fprintf(stderr, "failed to open %s: %s\n", argv[i], strerror(errno)); 134 | return -1; 135 | } 136 | } 137 | } 138 | 139 | if(fd == -1) { 140 | fprintf(stderr, "you must specify a device or image file\n"); 141 | return -1; 142 | } 143 | 144 | return 0; 145 | 146 | invalid: 147 | fprintf(stderr, "invalid argument: %s\n", argv[i]); 148 | return -1; 149 | } 150 | -------------------------------------------------------------------------------- /include/kdef.h: -------------------------------------------------------------------------------- 1 | /* definitions that must be in-sync between kernel and user space */ 2 | #ifndef KERNEL_DEFS_H_ 3 | #define KERNEL_DEFS_H_ 4 | 5 | /* --- defines for sys/wait.h */ 6 | #if defined(KERNEL) || defined(KDEF_WAIT_H) 7 | #define WNOHANG 1 8 | 9 | #define WEXITSTATUS(s) ((s) & _WSTATUS_MASK) 10 | #define WCOREDUMP(s) ((s) & _WCORE_BIT) 11 | 12 | #define WIFEXITED(s) (_WREASON(s) == _WREASON_EXITED) 13 | #define WIFSIGNALED(s) (_WREASON(s) == _WREASON_SIGNALED) 14 | 15 | /* implementation details */ 16 | #define _WSTATUS_MASK 0xff 17 | 18 | #define _WREASON_SHIFT 8 19 | #define _WREASON_MASK 0xf00 20 | #define _WREASON(s) (((s) & _WREASON_MASK) >> _WREASON_SHIFT) 21 | 22 | #define _WREASON_EXITED 1 23 | #define _WREASON_SIGNALED 2 24 | 25 | #define _WCORE_BIT 0x1000 26 | #endif /* sys/wait.h */ 27 | 28 | 29 | 30 | /* --- defines for errno.h */ 31 | #if defined(KERNEL) || defined(KDEF_ERRNO_H) 32 | #define EFOO 1 /* I just like to return -1 some times :) */ 33 | 34 | #define EAGAIN 2 35 | #define EINVAL 3 36 | #define ECHILD 4 37 | #define EBUSY 5 38 | #define ENOMEM 6 39 | #define EIO 7 40 | #define ENOENT 8 41 | #define ENAMETOOLONG 9 42 | #define ENOSPC 10 43 | #define EPERM 11 44 | #define ENOTDIR 12 45 | 46 | #define EBUG 127 /* for missing features and known bugs */ 47 | #endif /* errno.h */ 48 | 49 | 50 | /* --- defines for syscall.h */ 51 | #if defined(KERNEL) || defined(KDEF_SYSCALL_H) 52 | 53 | #define SYSCALL_INT 0x80 54 | 55 | #define SYS_HELLO 0 56 | #define SYS_SLEEP 1 57 | #define SYS_FORK 2 58 | #define SYS_EXIT 3 59 | #define SYS_WAITPID 4 60 | #define SYS_GETPID 5 61 | #define SYS_GETPPID 6 62 | #define SYS_MOUNT 7 63 | #define SYS_UMOUNT 8 64 | #define SYS_OPEN 9 65 | #define SYS_CLOSE 10 66 | #define SYS_READ 11 67 | #define SYS_WRITE 12 68 | #define SYS_LSEEK 13 69 | 70 | /* keep this one more than the last syscall */ 71 | #define NUM_SYSCALLS 14 72 | 73 | #endif /* syscall.h */ 74 | 75 | /* --- defines for sys/stat.h */ 76 | #if defined(KERNEL) || defined(STAT_H) 77 | 78 | #define S_IFMT 0170000 /* bit mask for the file type bit fields */ 79 | #define S_IFSOCK 0140000 /* socket */ 80 | #define S_IFLNK 0120000 /* symbolic link */ 81 | #define S_IFREG 0100000 /* regular file */ 82 | #define S_IFBLK 0060000 /* block device */ 83 | #define S_IFDIR 0040000 /* directory */ 84 | #define S_IFCHR 0020000 /* character device */ 85 | #define S_IFIFO 0010000 /* FIFO */ 86 | 87 | #define S_ISUID 0004000 /* set UID bit */ 88 | #define S_ISGID 0002000 /* set-group-ID bit (see below) */ 89 | #define S_ISVTX 0001000 /* sticky bit (see below) */ 90 | 91 | #define S_IRWXU 00700 /* mask for file owner permissions */ 92 | #define S_IRUSR 00400 /* owner has read permission */ 93 | #define S_IWUSR 00200 /* owner has write permission */ 94 | #define S_IXUSR 00100 /* owner has execute permission */ 95 | #define S_IRWXG 00070 /* mask for group permissions */ 96 | #define S_IRGRP 00040 /* group has read permission */ 97 | #define S_IWGRP 00020 /* group has write permission */ 98 | #define S_IXGRP 00010 /* group has execute permission */ 99 | #define S_IRWXO 00007 /* mask for permissions for others (not in group) */ 100 | #define S_IROTH 00004 /* others have read permission */ 101 | #define S_IWOTH 00002 /* others have write permission */ 102 | #define S_IXOTH 00001 /* others have execute permission */ 103 | 104 | #endif /* sys/stat.h */ 105 | 106 | 107 | #endif /* KERNEL_DEFS_H_ */ 108 | -------------------------------------------------------------------------------- /mkdiskimg: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | SUDO=${SUDO:-sudo} 4 | if [ `id -u` = 0 ]; then 5 | unset SUDO 6 | fi 7 | 8 | # mkdiskimg - prepare a disk image for the kernel 9 | # usage: mkdiskimg [size in mb, default: 40] 10 | 11 | imgfile=disk.img 12 | if [ -e $imgfile ]; then 13 | echo "file '$imgfile' exists, will not overwrite, delete it first" >&2 14 | exit 1 15 | fi 16 | 17 | if [ -n "$1" ]; then 18 | sizemb=$1 19 | else 20 | sizemb=40 21 | fi 22 | 23 | # create the image file 24 | echo 'creating image file ...' 25 | dd if=/dev/zero of=$imgfile bs=1M count=$sizemb || exit 1 26 | 27 | mkpart_linux() 28 | { 29 | sfdisk -q $imgfile <&2 39 | exit 1 40 | fi 41 | 42 | $SUDO fdisk -q -f - /dev/$devfile <&2 63 | exit 1 64 | fi 65 | 66 | echo 67 | echo 'done. happy hacking!' 68 | -------------------------------------------------------------------------------- /run: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | qemu-system-i386 -kernel kernel.elf -soundhw pcspk -net none $* 4 | -------------------------------------------------------------------------------- /src/asmops.h: -------------------------------------------------------------------------------- 1 | #ifndef ASMOPS_H_ 2 | #define ASMOPS_H_ 3 | 4 | #include 5 | 6 | /* general purpose registers as they are pushed by pusha */ 7 | struct registers { 8 | uint32_t edi, esi, ebp, esp; 9 | uint32_t ebx, edx, ecx, eax; 10 | } __attribute__ ((packed)); 11 | 12 | #define enable_intr() asm volatile("sti") 13 | #define disable_intr() asm volatile("cli") 14 | #define halt_cpu() asm volatile("hlt") 15 | 16 | #define push_regs() asm volatile("pusha"); 17 | #define pop_regs() asm volatile("popa"); 18 | 19 | #define inb(dest, port) asm volatile( \ 20 | "inb %1, %0\n\t" \ 21 | : "=a" ((unsigned char)(dest)) \ 22 | : "dN" ((unsigned short)(port))) 23 | 24 | #define inw(dest, port) asm volatile( \ 25 | "inw %1, %0\n\t" \ 26 | : "=a" ((unsigned short)(dest)) \ 27 | : "dN" ((unsigned short)(port))) 28 | 29 | #define inl(dest, port) asm volatile( \ 30 | "inl %1, %0\n\t" \ 31 | : "=a" ((unsigned long)(dest)) \ 32 | : "dN" ((unsigned short)(port))) 33 | 34 | #define outb(src, port) asm volatile( \ 35 | "outb %0, %1\n\t" \ 36 | :: "a" ((unsigned char)(src)), "dN" ((unsigned short)(port))) 37 | 38 | #define outw(src, port) asm volatile( \ 39 | "outw %0, %1\n\t" \ 40 | :: "a" ((unsigned short)(src)), "dN" ((unsigned short)(port))) 41 | 42 | #define outl(src, port) asm volatile( \ 43 | "outl %0, %1\n\t" \ 44 | :: "a" ((unsigned long)(src)), "dN" ((unsigned short)(port))) 45 | 46 | #define iodelay() outb(0, 0x80) 47 | 48 | 49 | #endif /* ASMOPS_H_ */ 50 | -------------------------------------------------------------------------------- /src/ata.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "ata.h" 8 | #include "intr.h" 9 | #include "asmops.h" 10 | #include "sched.h" 11 | #include "mutex.h" 12 | 13 | /* registers */ 14 | #define REG_DATA 0 /* R/W */ 15 | #define REG_ERROR 1 /* R */ 16 | #define REG_FEATURES 1 /* W */ 17 | #define REG_COUNT 2 /* R/W */ 18 | #define REG_LBA0 3 /* R/W */ 19 | #define REG_LBA1 4 /* R/W */ 20 | #define REG_LBA2 5 /* R/W */ 21 | #define REG_DEVICE 6 /* R/W */ 22 | #define REG_CMD 7 /* W */ 23 | #define REG_STATUS 7 /* R */ 24 | 25 | #define REG_CTL 518 26 | #define REG_ALTSTAT 518 27 | 28 | /* status bit fields */ 29 | #define ST_ERR (1 << 0) 30 | #define ST_DRQ (1 << 3) 31 | #define ST_DRDY (1 << 6) 32 | #define ST_BSY (1 << 7) 33 | 34 | /* device select bit in control register */ 35 | #define DEV_SEL(x) (((x) & 1) << 4) 36 | #define DEV_LBA (1 << 6) 37 | 38 | /* ATA commands */ 39 | #define CMD_IDENTIFY 0xec 40 | #define CMD_READ 0x20 41 | #define CMD_READ48 0x24 42 | #define CMD_WRITE 0x30 43 | #define CMD_WRITE48 0x34 44 | 45 | 46 | struct device { 47 | int id; /* id of the device on its ATA interface (0 master, 1 slave) */ 48 | int iface; /* ATA interface for this device (0 or 1) */ 49 | int port_base; /* interface I/O port base */ 50 | 51 | uint32_t nsect_lba; 52 | uint64_t nsect_lba48; 53 | }; 54 | 55 | 56 | static int readwrite_pio(int devno, uint64_t sect, void *buf, void (*rwdata)(struct device*, void*)); 57 | static int identify(struct device *dev, int iface, int id); 58 | static void select_dev(struct device *dev); 59 | static int wait_busy(struct device *dev); 60 | static int wait_drq(struct device *dev); 61 | static void read_data(struct device *dev, void *buf); 62 | static void write_data(struct device *dev, void *buf); 63 | static inline uint8_t read_reg8(struct device *dev, int reg); 64 | static inline uint16_t read_reg16(struct device *dev, int reg); 65 | static inline void write_reg8(struct device *dev, int reg, uint8_t val); 66 | static inline void write_reg16(struct device *dev, int reg, uint16_t val); 67 | static void ata_intr(int inum); 68 | static void *atastr(void *res, void *src, int n); 69 | static char *size_str(uint64_t nsect, char *buf); 70 | static void print_error(int devid, int wr, uint32_t high, uint32_t low, unsigned char err); 71 | 72 | /* last drive selected on each bus */ 73 | static int drvsel[2] = {-1, -1}; 74 | 75 | /* 4 possible devices: 2 ATA interfaces with 2 devices each. 76 | * this will never change unless we start querying the PCI config space 77 | * for additional drives (in which case this whole init code must be 78 | * rewritten anyway), but I like it spelt out like this. 79 | */ 80 | #define MAX_IFACES 2 81 | #define MAX_DEV (MAX_IFACES * 2) 82 | static struct device devices[MAX_DEV]; 83 | static int ndev; 84 | 85 | /* This serves as a sync point for I/O. While the mutex is held, 86 | * some process is doing I/O and all the others must wait. 87 | */ 88 | static mutex_t pending; 89 | 90 | 91 | void init_ata(void) 92 | { 93 | int i; 94 | 95 | interrupt(IRQ_TO_INTR(15), ata_intr); 96 | 97 | ndev = 0; 98 | for(i=0; insect_lba48) { 118 | return dev->nsect_lba48; 119 | } 120 | return dev->nsect_lba; 121 | } 122 | 123 | int ata_read_pio(int devno, uint64_t sect, void *buf) 124 | { 125 | return readwrite_pio(devno, sect, buf, read_data); 126 | } 127 | 128 | int ata_write_pio(int devno, uint64_t sect, void *buf) 129 | { 130 | return readwrite_pio(devno, sect, buf, write_data); 131 | } 132 | 133 | static int readwrite_pio(int devno, uint64_t sect, void *buf, void (*rwdata)(struct device*, void*)) 134 | { 135 | int use_irq, cmd, st, res = -1; 136 | uint32_t sect_low, sect_high; 137 | struct device *dev = devices + devno; 138 | 139 | if(dev->id == -1) { 140 | return -1; 141 | } 142 | use_irq = get_current_proc() != 0; 143 | 144 | if(use_irq) { 145 | /* wait for the interface to become available */ 146 | mutex_lock(&pending); 147 | } 148 | 149 | select_dev(dev); 150 | 151 | /* LBA48 requires the high-order bits first */ 152 | if(sect >= dev->nsect_lba) { 153 | sect_high = (uint32_t)(sect >> 24); 154 | sect_low = (uint32_t)sect & 0xffffff; 155 | 156 | if(sect >= dev->nsect_lba48) { 157 | goto end; 158 | } 159 | cmd = CMD_READ48; 160 | 161 | write_reg8(dev, REG_COUNT, 0); 162 | write_reg8(dev, REG_LBA0, sect_high & 0xff); 163 | write_reg8(dev, REG_LBA1, (sect_high >> 8) & 0xff); 164 | write_reg8(dev, REG_LBA2, (sect_high >> 16) & 0xff); 165 | } else { 166 | cmd = CMD_READ; 167 | sect_high = 0; 168 | sect_low = (uint32_t)sect & 0xfffffff; 169 | } 170 | 171 | write_reg8(dev, REG_COUNT, 1); 172 | write_reg8(dev, REG_LBA0, sect_low & 0xff); 173 | write_reg8(dev, REG_LBA1, (sect_low >> 8) & 0xff); 174 | write_reg8(dev, REG_LBA2, (sect_low >> 16) & 0xff); 175 | write_reg8(dev, REG_DEVICE, ((sect_low >> 24) & 0xf) | DEV_LBA | DEV_SEL(dev->id)); 176 | /* execute */ 177 | write_reg8(dev, REG_CMD, cmd); 178 | 179 | /* wait for the data to become available */ 180 | do { 181 | if(use_irq) { 182 | /* also sleep on the mutex if we're called from userspace */ 183 | wait(&pending); 184 | } 185 | } while(((st = read_reg8(dev, REG_ALTSTAT)) & (ST_DRQ | ST_ERR)) == 0); 186 | 187 | if(st & ST_ERR) { 188 | //print_error(int devid, int wr, uint32_t high, uint32_t low, unsigned char err); 189 | unsigned char err; 190 | 191 | err = read_reg8(dev, REG_ERROR); 192 | print_error(devno, 0, sect_high, sect_low, err); 193 | goto end; 194 | } 195 | 196 | /* read/write the data and we're done */ 197 | rwdata(dev, buf); 198 | res = 0; 199 | end: 200 | if(use_irq) { 201 | mutex_unlock(&pending); 202 | } 203 | return res; 204 | } 205 | 206 | static int identify(struct device *dev, int iface, int id) 207 | { 208 | /* base address of the two ATA interfaces */ 209 | static const int port_base[] = {0x1f0, 0x170}; 210 | unsigned char st; 211 | uint16_t *info; 212 | char textbuf[42]; /* at most we need 40 chars for ident strings */ 213 | 214 | dev->id = id; 215 | dev->iface = iface; 216 | dev->port_base = port_base[iface]; 217 | 218 | /* a status of 0xff means there's no drive on the interface */ 219 | if((st = read_reg8(dev, REG_ALTSTAT)) == 0xff) { 220 | return -1; 221 | } 222 | 223 | select_dev(dev); 224 | 225 | write_reg8(dev, REG_CMD, CMD_IDENTIFY); 226 | 227 | if(!(st = read_reg8(dev, REG_ALTSTAT)) || (st & ST_ERR)) { 228 | /* does not exist */ 229 | return -1; 230 | } 231 | if(wait_busy(dev) == -1) { 232 | /* got ST_ERR, not ATA */ 233 | return -1; 234 | } 235 | 236 | info = malloc(512); 237 | assert(info); 238 | 239 | /* read the device information */ 240 | read_data(dev, info); 241 | 242 | /* print model and serial */ 243 | printf("ata%d: %s", (dev->iface << 1) | dev->id, atastr(textbuf, info + 27, 40)); 244 | printf(" [s/n: %s]", atastr(textbuf, info + 10, 20)); 245 | 246 | dev->nsect_lba = *(uint32_t*)(info + 60); 247 | dev->nsect_lba48 = *(uint64_t*)(info + 100) & 0xffffffffffffull; 248 | 249 | if(!dev->nsect_lba) { 250 | printf(" drive does not support LBA, ignoring!\n"); 251 | free(info); 252 | return -1; 253 | } 254 | 255 | if(dev->nsect_lba48) { 256 | size_str(dev->nsect_lba48, textbuf); 257 | } else { 258 | size_str(dev->nsect_lba, textbuf); 259 | } 260 | printf(" size: %s\n", textbuf); 261 | 262 | free(info); 263 | return 0; 264 | } 265 | 266 | static void select_dev(struct device *dev) 267 | { 268 | /* if this is the currently selected device, thy work is done */ 269 | if(drvsel[dev->iface] == dev->id) 270 | return; 271 | 272 | /* wait for BSY and DRQ to clear */ 273 | while(read_reg8(dev, REG_ALTSTAT) & (ST_BSY | ST_DRQ)); 274 | 275 | /* set the correct device bit to the device register */ 276 | write_reg8(dev, REG_DEVICE, DEV_SEL(dev->id)); 277 | 278 | /* wait a bit to allow the device time to respond */ 279 | iodelay(); iodelay(); iodelay(); iodelay(); 280 | } 281 | 282 | static int wait_busy(struct device *dev) 283 | { 284 | unsigned char st; 285 | 286 | do { 287 | st = read_reg8(dev, REG_ALTSTAT); 288 | } while((st & ST_BSY) && !(st & ST_ERR)); 289 | 290 | return st & ST_ERR ? -1 : 0; 291 | } 292 | 293 | static int wait_drq(struct device *dev) 294 | { 295 | unsigned char st; 296 | 297 | do { 298 | st = read_reg8(dev, REG_ALTSTAT); 299 | } while(!(st & (ST_DRQ | ST_ERR))); 300 | 301 | return st & ST_ERR ? -1 : 0; 302 | } 303 | 304 | static void read_data(struct device *dev, void *buf) 305 | { 306 | int i; 307 | uint16_t *ptr = buf; 308 | 309 | /* wait for the data request from the drive */ 310 | wait_drq(dev); 311 | 312 | /* ready to transfer */ 313 | for(i=0; i<256; i++) { 314 | *ptr++ = read_reg16(dev, REG_DATA); 315 | } 316 | } 317 | 318 | static void write_data(struct device *dev, void *buf) 319 | { 320 | int i; 321 | uint16_t *ptr = buf; 322 | 323 | /* wait for the data request from the device */ 324 | wait_drq(dev); 325 | 326 | /* ready to transfer */ 327 | for(i=0; i<256; i++) { 328 | write_reg16(dev, REG_DATA, *ptr++); 329 | } 330 | } 331 | 332 | static inline uint8_t read_reg8(struct device *dev, int reg) 333 | { 334 | uint8_t val; 335 | inb(val, dev->port_base + reg); 336 | return val; 337 | } 338 | 339 | static inline uint16_t read_reg16(struct device *dev, int reg) 340 | { 341 | uint16_t val; 342 | inw(val, dev->port_base + reg); 343 | return val; 344 | } 345 | 346 | static inline void write_reg8(struct device *dev, int reg, uint8_t val) 347 | { 348 | outb(val, dev->port_base + reg); 349 | } 350 | 351 | static inline void write_reg16(struct device *dev, int reg, uint16_t val) 352 | { 353 | outw(val, dev->port_base + reg); 354 | } 355 | 356 | static void ata_intr(int inum) 357 | { 358 | printf("ATA interrupt\n"); 359 | } 360 | 361 | static void *atastr(void *res, void *src, int n) 362 | { 363 | int i; 364 | uint16_t *sptr = (uint16_t*)src; 365 | char *dptr = res; 366 | 367 | for(i=0; i> 8; 369 | *dptr++ = *sptr++ & 0xff; 370 | } 371 | 372 | while(isspace(*--dptr)); 373 | *++dptr = 0; 374 | return res; 375 | } 376 | 377 | static char *size_str(uint64_t nsect, char *buf) 378 | { 379 | static const char *suffix[] = {"kb", "mb", "gb", "tb", "pb", 0}; 380 | int i; 381 | unsigned int rem; 382 | 383 | /* start with kilobytes */ 384 | nsect /= 2; 385 | 386 | for(i=0; nsect >= 1024 && suffix[i + 1]; i++) { 387 | rem = nsect % 1024; 388 | nsect /= 1024; 389 | } 390 | sprintf(buf, "%u.%u%s", (unsigned int)nsect, 100 * rem / 1024, suffix[i]); 391 | return buf; 392 | } 393 | 394 | #define ERR_NM (1 << 1) 395 | #define ERR_ABRT (1 << 2) 396 | #define ERR_MCR (1 << 3) 397 | #define ERR_IDNF (1 << 4) 398 | #define ERR_MC (1 << 5) 399 | #define ERR_UNC (1 << 6) 400 | 401 | static void print_error(int devid, int wr, uint32_t high, uint32_t low, unsigned char err) 402 | { 403 | printf("ata%d %s %serror ", devid, wr ? "write" : "read", err & ERR_UNC ? "uncorrectable " : ""); 404 | printf("at sector %x%x: ", high, low); 405 | 406 | if(err & ERR_ABRT) 407 | printf("abort "); 408 | if(err & ERR_IDNF) 409 | printf("invalid address "); 410 | if(err & ERR_NM) 411 | printf("no media "); 412 | 413 | printf("(%x)\n", (unsigned int)err); 414 | } 415 | -------------------------------------------------------------------------------- /src/ata.h: -------------------------------------------------------------------------------- 1 | #ifndef ATA_H_ 2 | #define ATA_H_ 3 | 4 | void init_ata(void); 5 | 6 | int ata_num_devices(void); 7 | uint64_t ata_num_sectors(int devno); 8 | 9 | int ata_read_pio(int devno, uint64_t sect, void *buf); 10 | int ata_write_pio(int devno, uint64_t sect, void *buf); 11 | 12 | #endif /* ATA_H_ */ 13 | -------------------------------------------------------------------------------- /src/bdev.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "bdev.h" 5 | #include "ata.h" 6 | #include "part.h" 7 | 8 | #define MKMINOR(disk, part) ((((disk) & 0xf) << 4) | ((part) & 0xf)) 9 | #define MINOR_DISK(x) (((x) >> 4) & 0xf) 10 | #define MINOR_PART(x) ((x) & 0xf) 11 | 12 | struct block_device *blk_open(dev_t dev) 13 | { 14 | struct block_device *bdev; 15 | int i, minor, devno, part; 16 | 17 | /* XXX for now ignore the major number as we only have ata devices */ 18 | minor = DEV_MINOR(dev); 19 | devno = MINOR_DISK(minor); 20 | part = MINOR_PART(minor); 21 | 22 | bdev = malloc(sizeof *bdev); 23 | assert(bdev); 24 | 25 | bdev->ata_dev = devno; 26 | 27 | if(part) { 28 | struct partition *plist = get_part_list(devno); 29 | assert(plist); 30 | 31 | for(i=1; inext; 34 | } 35 | if(!plist) { 36 | free(bdev); 37 | free_part_list(plist); 38 | return 0; 39 | } 40 | 41 | bdev->offset = SECT_TO_BLK(plist->start_sect); 42 | bdev->size = SECT_TO_BLK(plist->size_sect); 43 | bdev->ptype = get_part_type(plist); 44 | 45 | free_part_list(plist); 46 | } else { 47 | bdev->offset = 0; 48 | bdev->size = SECT_TO_BLK(ata_num_sectors(devno)); 49 | bdev->ptype = 0; 50 | } 51 | 52 | return bdev; 53 | } 54 | 55 | void blk_close(struct block_device *bdev) 56 | { 57 | free(bdev); 58 | } 59 | 60 | #define NSECT (BLKSZ / 512) 61 | 62 | int blk_read(struct block_device *bdev, uint32_t blk, int count, void *buf) 63 | { 64 | int i; 65 | char *ptr = buf; 66 | uint32_t sect = blk * NSECT + bdev->offset; 67 | 68 | for(i=0; iata_dev, sect++, ptr) == -1) { 70 | return -1; 71 | } 72 | ptr += 512; 73 | } 74 | return 0; 75 | } 76 | 77 | int blk_write(struct block_device *bdev, uint32_t blk, int count, void *buf) 78 | { 79 | int i; 80 | char *ptr = buf; 81 | uint32_t sect = blk * NSECT + bdev->offset; 82 | 83 | for(i=0; iata_dev, sect++, ptr) == -1) { 85 | return -1; 86 | } 87 | ptr += 512; 88 | } 89 | return 0; 90 | } 91 | 92 | dev_t bdev_by_name(const char *name) 93 | { 94 | int minor; 95 | int atadev, part = 0; 96 | 97 | char *tmp = strrchr(name, '/'); 98 | if(tmp) { 99 | name = tmp + 1; 100 | } 101 | 102 | if(strstr(name, "ata") != name) { 103 | return 0; 104 | } 105 | name += 3; 106 | 107 | atadev = strtol(name, &tmp, 10); 108 | if(tmp == name) { 109 | return 0; 110 | } 111 | name = tmp; 112 | 113 | if(*name++ == 'p') { 114 | part = strtol(name, &tmp, 10) + 1; 115 | if(tmp == name) { 116 | return 0; 117 | } 118 | } 119 | 120 | minor = MKMINOR(atadev, part); 121 | return DEVNO(1, minor); 122 | } 123 | -------------------------------------------------------------------------------- /src/bdev.h: -------------------------------------------------------------------------------- 1 | #ifndef BDEV_H_ 2 | #define BDEV_H_ 3 | 4 | #include "fs.h" /* for dev_t */ 5 | 6 | /* TODO buffer cache */ 7 | 8 | struct block_device { 9 | int ata_dev; 10 | uint32_t offset, size; 11 | 12 | /* Partition type (if the blkdev is a partition), otherwise 0. 13 | * Used as just an extra sanity check to make sure we don't 14 | * try to mount the wrong filesystem. 15 | */ 16 | int ptype; 17 | }; 18 | 19 | struct block_device *blk_open(dev_t dev); 20 | void blk_close(struct block_device *bdev); 21 | 22 | int blk_read(struct block_device *bdev, uint32_t blk, int count, void *buf); 23 | int blk_write(struct block_device *bdev, uint32_t blk, int count, void *buf); 24 | 25 | dev_t bdev_by_name(const char *name); 26 | 27 | #endif /* BDEV_H_ */ 28 | -------------------------------------------------------------------------------- /src/boot/mboot.S: -------------------------------------------------------------------------------- 1 | #define MAGIC 0x1badb002 2 | /* flags with bit 1 set means we need memory info */ 3 | #define FLAGS 2 4 | #define STACK_SIZE 0x4000 5 | 6 | .text 7 | .align 4 8 | 9 | /* multiboot header */ 10 | .long MAGIC 11 | .long FLAGS 12 | .long -(MAGIC + FLAGS) /* checksum */ 13 | 14 | .globl kentry 15 | kentry: 16 | /* setup a temporary kernel stack */ 17 | movl $(stack + STACK_SIZE), %esp 18 | /* reset eflags register */ 19 | pushl $0 20 | popf 21 | /* call the kernel main function. ebx points to the 22 | * multiboot information structure */ 23 | push %ebx 24 | call kmain 25 | /* we dropped out of main, halt the CPU */ 26 | cli 27 | hlt 28 | 29 | /* space for the temporary kernel stack */ 30 | .comm stack, STACK_SIZE 31 | -------------------------------------------------------------------------------- /src/boot/mboot.h: -------------------------------------------------------------------------------- 1 | #ifndef MBOOT_H_ 2 | #define MBOOT_H_ 3 | 4 | #include 5 | 6 | #define MB_MEM (1 << 0) 7 | #define MB_BOOTDEV (1 << 1) 8 | #define MB_CMDLINE (1 << 2) 9 | #define MB_MODULES (1 << 3) 10 | #define MB_AOUT_SYM (1 << 4) 11 | #define MB_ELF_SHDR (1 << 5) 12 | #define MB_MMAP (1 << 6) 13 | #define MB_DRIVES (1 << 7) 14 | #define MB_CFGTAB (1 << 8) 15 | #define MB_LDRNAME (1 << 9) 16 | #define MB_APM (1 << 10) 17 | #define MB_GFX (1 << 11) 18 | 19 | #define MB_MEM_VALID 1 20 | #define MB_DRIVE_CHS 0 21 | #define MB_DRIVE_LBA 1 22 | 23 | struct mboot_module { 24 | uint32_t start_addr, end_addr; 25 | char *str; 26 | uint32_t reserved; 27 | }; 28 | 29 | struct mboot_elf_shdr_table { 30 | uint32_t num; 31 | uint32_t size; 32 | uint32_t addr; 33 | uint32_t shndx; 34 | }; 35 | 36 | struct mboot_mmap { 37 | uint32_t skip; 38 | uint32_t base_low, base_high; 39 | uint32_t length_low, length_high; 40 | uint32_t type; 41 | }; 42 | 43 | struct mboot_drive { 44 | uint32_t size; 45 | uint8_t id; 46 | uint8_t mode; 47 | uint16_t cyl; 48 | uint8_t heads, sect; 49 | uint16_t ports[1]; /* zero-terminated */ 50 | } __attribute__ ((packed)); 51 | 52 | struct mboot_apm { 53 | uint16_t ver; 54 | uint16_t cseg; 55 | uint32_t offs; 56 | uint16_t cseg16; 57 | uint16_t dseg; 58 | uint16_t flags; 59 | uint16_t cseg_len; 60 | uint16_t cseg16_len; 61 | uint16_t dseg_len; 62 | } __attribute__ ((packed)); 63 | 64 | struct mboot_vbe { 65 | uint32_t ctl_info; 66 | uint32_t mode_info; 67 | uint16_t mode; 68 | uint16_t ifseg, ifoffs, iflen; 69 | } __attribute__ ((packed)); 70 | 71 | 72 | /* multiboot information structure */ 73 | struct mboot_info { 74 | uint32_t flags; 75 | /* mem_lower: available low memory (up to 640kb) 76 | * mem_upper: available upper memory (from 1mb and upwards) 77 | */ 78 | uint32_t mem_lower, mem_upper; 79 | /* boot device fields: MSB -> [part3|part2|part1|drive] <- LSB */ 80 | uint32_t boot_dev; 81 | char *cmdline; 82 | /* loaded kernel modules */ 83 | uint32_t mods_count; 84 | struct mboot_module *mods; 85 | /* elf sections table */ 86 | struct mboot_elf_shdr_table elf; 87 | /* memory map */ 88 | uint32_t mmap_len; 89 | struct mboot_mmap *mmap; 90 | /* drives table */ 91 | uint32_t drives_len; 92 | struct mboot_drive *drives; 93 | /* address of BIOS ROM configuration table */ 94 | uint32_t cfgtable; 95 | char *boot_loader_name; 96 | /* advanced power management */ 97 | struct mboot_apm *apm; 98 | /* video bios extensions */ 99 | struct mboot_vbe vbe; 100 | }; 101 | 102 | 103 | #endif /* MBOOT_H_ */ 104 | -------------------------------------------------------------------------------- /src/config.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONFIG_H_ 2 | #define _CONFIG_H_ 3 | 4 | /* frequency of generated timer ticks in hertz */ 5 | #define TICK_FREQ_HZ 250 6 | 7 | #define TIMESLICE 100 8 | #define TIMESLICE_TICKS (TIMESLICE * TICK_FREQ_HZ / 1000) 9 | 10 | /* allow automatic user stack growth by at most 1024 pages at a time (4mb) */ 11 | #define USTACK_MAXGROW 1024 12 | 13 | /* per-process kernel stack size (2 pages) */ 14 | #define KERN_STACK_SIZE 8192 15 | 16 | #endif /* _CONFIG_H_ */ 17 | -------------------------------------------------------------------------------- /src/desc.h: -------------------------------------------------------------------------------- 1 | #ifndef DESC_H_ 2 | #define DESC_H_ 3 | 4 | #include 5 | 6 | typedef struct { 7 | uint16_t d[4]; 8 | } desc_t; 9 | 10 | #endif /* DESC_H_ */ 11 | -------------------------------------------------------------------------------- /src/file.h: -------------------------------------------------------------------------------- 1 | #ifndef FILE_H_ 2 | #define FILE_H_ 3 | 4 | #include "fs.h" 5 | 6 | struct file { 7 | struct inode *inode; 8 | long ptr; 9 | }; 10 | 11 | #endif /* FILE_H_ */ 12 | -------------------------------------------------------------------------------- /src/fs.c: -------------------------------------------------------------------------------- 1 | /* This code is used by the kernel AND by userspace filesystem-related tools. */ 2 | 3 | /* XXX convention: 4 | * - functions that accept or return a struct inode, do not read/write it to disk 5 | * - functions that accept or return an int ino, do read/write it to disk 6 | * other kinds of blocks (data, indirect, etc) always hit the disk directly. 7 | */ 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include "fs.h" 15 | #include "bdev.h" 16 | #include "kdef.h" 17 | 18 | /* number of inodes in a block */ 19 | #define BLK_INODES (BLKSZ / sizeof(struct inode)) 20 | /* number of directory entries in a block */ 21 | #define BLK_DIRENT (BLKSZ / sizeof(struct dir_entry)) 22 | 23 | #define BLKBITS (BLKSZ * 8) 24 | 25 | #define BM_IDX(x) ((x) / 32) 26 | #define BM_BIT(x) ((x) & 0x1f) 27 | 28 | #define BM_ISFREE(bm, x) (((bm)[BM_IDX(x)] & (1 << BM_BIT(x))) == 0) 29 | #define BM_SET(bm, x) ((bm)[BM_IDX(x)] |= (1 << BM_BIT(x))) 30 | #define BM_CLR(bm, x) ((bm)[BM_IDX(x)] &= ~(1 << BM_BIT(x))) 31 | 32 | 33 | static struct inode *newdir(struct filesys *fs, struct inode *parent); 34 | static int addlink(struct filesys *fs, struct inode *target, struct inode *node, const char *name); 35 | static int read_superblock(struct filesys *fs); 36 | static int write_superblock(struct filesys *fs); 37 | static int get_inode(struct filesys *fs, int ino, struct inode *inode); 38 | static int put_inode(struct filesys *fs, struct inode *inode); 39 | static int find_free(uint32_t *bm, int sz); 40 | static int alloc_inode(struct filesys *fs); 41 | #define free_inode(fs, ino) BM_CLR((fs)->sb->ibm, (ino)) 42 | static int alloc_block(struct filesys *fs); 43 | #define free_block(fs, bno) BM_CLR((fs)->sb->bm, (bno)) 44 | #define zero_block(fs, bno) \ 45 | do { \ 46 | assert(bno > 0); \ 47 | blk_write((fs)->bdev, (bno), 1, (fs)->zeroblock); \ 48 | } while(0) 49 | 50 | static int file_block(struct filesys *fs, struct inode *node, int boffs, int allocate); 51 | #define get_file_block(fs, node, boffs) file_block(fs, node, boffs, 0) 52 | #define alloc_file_block(fs, node, boffs) file_block(fs, node, boffs, 1) 53 | 54 | 55 | int openfs(struct filesys *fs, dev_t dev) 56 | { 57 | int res; 58 | struct block_device *bdev; 59 | 60 | assert(BLKSZ % sizeof(struct inode) == 0); 61 | 62 | if(!(bdev = blk_open(dev))) { 63 | return -ENOENT; 64 | } 65 | fs->bdev = bdev; 66 | 67 | /* read the superblock */ 68 | if(!(fs->sb = malloc(BLKSZ))) { 69 | blk_close(bdev); 70 | return -ENOMEM; 71 | } 72 | if((res = read_superblock(fs)) != 0) { 73 | blk_close(bdev); 74 | return res; 75 | } 76 | 77 | /* allocate the zero-block buffer written to zero-out blocks */ 78 | if(!(fs->zeroblock = malloc(fs->sb->blksize))) { 79 | blk_close(bdev); 80 | free(fs->sb->ibm); 81 | free(fs->sb->bm); 82 | free(fs->sb->root); 83 | return -ENOMEM; 84 | } 85 | memset(fs->zeroblock, 0xff, fs->sb->blksize); 86 | 87 | return 0; 88 | } 89 | 90 | int mkfs(struct filesys *fs, dev_t dev) 91 | { 92 | struct superblock *sb; 93 | struct block_device *bdev; 94 | int i, bcount; 95 | 96 | if(!(bdev = blk_open(dev))) { 97 | return -1; 98 | } 99 | fs->bdev = bdev; 100 | 101 | if(!(sb = malloc(BLKSZ))) { 102 | blk_close(bdev); 103 | return -1; 104 | } 105 | fs->sb = sb; 106 | 107 | /* populate the superblock */ 108 | sb->magic = MAGIC; 109 | sb->ver = FS_VER; 110 | sb->blksize = BLKSZ; 111 | 112 | sb->num_blocks = bdev->size; 113 | sb->num_inodes = sb->num_blocks / 4; 114 | 115 | /* inode bitmap just after the superblock */ 116 | sb->ibm_start = 2; 117 | sb->ibm_count = (sb->num_inodes + BLKBITS - 1) / BLKBITS; 118 | /* also allocate and initialize in-memory inode bitmap */ 119 | sb->ibm = malloc(sb->ibm_count * BLKSZ); 120 | assert(sb->ibm); 121 | memset(sb->ibm, 0, sb->ibm_count * BLKSZ); 122 | 123 | /* XXX mark inode 0 as used always */ 124 | BM_SET(sb->ibm, 0); 125 | 126 | /* block bitmap just after the inode bitmap */ 127 | sb->bm_start = sb->ibm_start + sb->ibm_count; 128 | sb->bm_count = (sb->num_blocks + BLKBITS - 1) / BLKBITS; 129 | /* also allocate and initialize in-memory block bitmap */ 130 | sb->bm = malloc(sb->bm_count * BLKSZ); 131 | assert(sb->bm); 132 | memset(sb->bm, 0, sb->bm_count * BLKSZ); 133 | 134 | /* inode table, just after the block bitmap */ 135 | sb->itbl_start = sb->bm_start + sb->bm_count; 136 | sb->itbl_count = (sb->num_inodes * sizeof(struct inode) + BLKSZ - 1) / BLKSZ; 137 | 138 | /* mark all used blocks as used */ 139 | bcount = sb->itbl_start + sb->itbl_count; 140 | memset(sb->bm, 0xff, bcount / 8); 141 | for(i=0; ibm, bit); 144 | } 145 | 146 | /* create the root directory */ 147 | sb->root = newdir(fs, 0); 148 | sb->root_ino = sb->root->ino; 149 | /* and write the inode to disk */ 150 | put_inode(fs, sb->root); 151 | 152 | return 0; 153 | } 154 | 155 | static struct inode *newdir(struct filesys *fs, struct inode *parent) 156 | { 157 | struct inode *dirnode; 158 | 159 | /* allocate and initialize inode */ 160 | if(!(dirnode = malloc(sizeof *dirnode))) { 161 | return 0; 162 | } 163 | memset(dirnode, 0, sizeof *dirnode); 164 | 165 | if((dirnode->ino = alloc_inode(fs)) == -1) { 166 | printf("failed to allocate inode for a new directory\n"); 167 | free(dirnode); 168 | return 0; 169 | } 170 | dirnode->mode = S_IFDIR; 171 | 172 | /* add . and .. links */ 173 | addlink(fs, dirnode, dirnode, "."); 174 | addlink(fs, dirnode, parent ? parent : dirnode, ".."); 175 | 176 | return dirnode; 177 | } 178 | 179 | static int addlink(struct filesys *fs, struct inode *target, struct inode *node, const char *name) 180 | { 181 | struct dir_entry ent, *data; 182 | int i, boffs, bidx, len; 183 | 184 | if(!(target->mode & S_IFDIR)) { 185 | return -ENOTDIR; 186 | } 187 | if(node->mode & S_IFDIR) { 188 | return -EPERM; 189 | } 190 | /* TODO check that the link does not already exist (EEXIST) */ 191 | 192 | if((len = strlen(name)) > NAME_MAX) { 193 | return -ENAMETOOLONG; 194 | } 195 | ent.ino = node->ino; 196 | memcpy(ent.name, name, len + 1); 197 | 198 | /* find a place to put it */ 199 | if(!(data = malloc(BLKSZ))) { 200 | return -ENOMEM; 201 | } 202 | 203 | boffs = 0; 204 | while((bidx = get_file_block(fs, target, boffs)) > 0) { 205 | /* read the block, and search for an empty entry */ 206 | blk_read(fs->bdev, bidx, 1, data); 207 | 208 | /* for all directory entries in this block... */ 209 | for(i=0; ibdev, bidx, 1, data); 231 | node->nlink++; /* increase reference count */ 232 | 233 | free(data); 234 | return 0; 235 | } 236 | 237 | 238 | static int read_superblock(struct filesys *fs) 239 | { 240 | struct superblock *sb = fs->sb; 241 | 242 | /* read superblock and verify */ 243 | if(blk_read(fs->bdev, 1, 1, sb) == -1) { 244 | printf("failed to read superblock\n"); 245 | return -EIO; 246 | } 247 | if(sb->magic != MAGIC) { 248 | printf("invalid magic\n"); 249 | return -EINVAL; 250 | } 251 | if(sb->ver > FS_VER) { 252 | printf("invalid version: %d\n", sb->ver); 253 | return -EINVAL; 254 | } 255 | if(sb->blksize != BLKSZ) { 256 | printf("invalid block size: %d\n", sb->blksize); 257 | return -EINVAL; 258 | } 259 | 260 | /* allocate and populate in-memory bitmaps */ 261 | if(!(sb->ibm = malloc(sb->ibm_count * sb->blksize))) { 262 | return -ENOMEM; 263 | } 264 | if(blk_read(fs->bdev, sb->ibm_start, sb->ibm_count, sb->ibm) == -1) { 265 | printf("failed to read inode bitmap\n"); 266 | free(sb->ibm); 267 | return -EIO; 268 | } 269 | if(!(sb->bm = malloc(sb->bm_count * sb->blksize))) { 270 | free(sb->ibm); 271 | return -ENOMEM; 272 | } 273 | if(blk_read(fs->bdev, sb->bm_start, sb->bm_count, sb->bm) == -1) { 274 | printf("failed to read block bitmap\n"); 275 | free(sb->ibm); 276 | free(sb->bm); 277 | return -EIO; 278 | } 279 | 280 | /* read the root inode */ 281 | if(!(sb->root = malloc(sizeof *sb->root))) { 282 | free(sb->ibm); 283 | free(sb->bm); 284 | return -ENOMEM; 285 | } 286 | if(get_inode(fs, sb->root_ino, sb->root) == -1) { 287 | printf("failed to read root inode\n"); 288 | return -1; 289 | } 290 | 291 | return 0; 292 | } 293 | 294 | static int write_superblock(struct filesys *fs) 295 | { 296 | struct superblock *sb = fs->sb; 297 | 298 | /* write back any changes in the root inode */ 299 | if(put_inode(fs, sb->root) == -1) { 300 | return -1; 301 | } 302 | /* write back the block bitmap */ 303 | if(blk_write(fs->bdev, sb->bm_start, sb->bm_count, sb->bm) == -1) { 304 | return -1; 305 | } 306 | /* write back the inode bitmap */ 307 | if(blk_write(fs->bdev, sb->ibm_start, sb->ibm_count, sb->ibm) == -1) { 308 | return -1; 309 | } 310 | /* write the superblock itself */ 311 | if(blk_write(fs->bdev, 1, 1, sb) == -1) { 312 | return -1; 313 | } 314 | return 0; 315 | } 316 | 317 | /* copy the requested inode from the disk, into the buffer passed in the last arg */ 318 | static int get_inode(struct filesys *fs, int ino, struct inode *inode) 319 | { 320 | struct inode *buf = malloc(BLKSZ); 321 | assert(buf); 322 | 323 | if(blk_read(fs->bdev, fs->sb->itbl_start + ino / BLK_INODES, 1, buf) == -1) { 324 | free(buf); 325 | return -1; 326 | } 327 | memcpy(inode, buf + ino % BLK_INODES, sizeof *inode); 328 | free(buf); 329 | return 0; 330 | } 331 | 332 | /* write the inode to the disk */ 333 | static int put_inode(struct filesys *fs, struct inode *inode) 334 | { 335 | struct inode *buf = malloc(BLKSZ); 336 | assert(buf); 337 | 338 | if(blk_read(fs->bdev, fs->sb->itbl_start + inode->ino / BLK_INODES, 1, buf) == -1) { 339 | free(buf); 340 | return -1; 341 | } 342 | memcpy(buf + inode->ino % BLK_INODES, inode, sizeof *inode); 343 | 344 | if(blk_write(fs->bdev, fs->sb->itbl_start + inode->ino / BLK_INODES, 1, buf) == -1) { 345 | free(buf); 346 | return -1; 347 | } 348 | free(buf); 349 | return 0; 350 | } 351 | 352 | /* find a free element in the bitmap and return its number */ 353 | static int find_free(uint32_t *bm, int nbits) 354 | { 355 | int i, j, nwords = nbits / 32; 356 | uint32_t ent = 0; 357 | 358 | for(i=0; i<=nwords; i++) { 359 | if(bm[i] != 0xffffffff) { 360 | for(j=0; j<32; j++) { 361 | if(BM_ISFREE(bm, ent)) { 362 | return ent; 363 | } 364 | ent++; 365 | } 366 | 367 | panic("shouldn't happen (in find_free:fs.c)"); 368 | } else { 369 | ent += 32; 370 | } 371 | } 372 | 373 | return -1; 374 | } 375 | 376 | static int alloc_inode(struct filesys *fs) 377 | { 378 | int ino; 379 | 380 | if((ino = find_free(fs->sb->ibm, fs->sb->num_inodes)) == -1) { 381 | return -1; 382 | } 383 | BM_SET(fs->sb->ibm, ino); 384 | return 0; 385 | } 386 | 387 | static int alloc_block(struct filesys *fs) 388 | { 389 | int bno; 390 | 391 | if((bno = find_free(fs->sb->bm, fs->sb->num_blocks)) == -1) { 392 | return -1; 393 | } 394 | BM_SET(fs->sb->bm, bno); 395 | return 0; 396 | } 397 | 398 | #define BLK_BLKID (BLKSZ / sizeof(blkid)) 399 | #define MAX_IND (NDIRBLK + BLK_BLKID) 400 | #define MAX_DIND (MAX_IND + BLK_BLKID * BLK_BLKID) 401 | 402 | static int file_block(struct filesys *fs, struct inode *node, int boffs, int allocate) 403 | { 404 | int res, idx, node_dirty = 0; 405 | blkid *barr; 406 | 407 | /* out of bounds */ 408 | if(boffs < 0 || boffs >= MAX_DIND) { 409 | return 0; 410 | } 411 | 412 | /* is it a direct block ? */ 413 | if(boffs < NDIRBLK) { 414 | if(!(res = node->blk[boffs]) && allocate) { 415 | res = node->blk[boffs] = alloc_block(fs); 416 | if(res) { 417 | zero_block(fs, res); 418 | /* also write back the modified inode */ 419 | put_inode(fs, node); 420 | } 421 | } 422 | return res; 423 | } 424 | 425 | barr = malloc(fs->sb->blksize); 426 | assert(barr); 427 | 428 | /* is it an indirect block ? */ 429 | if(boffs < MAX_IND) { 430 | int ind_dirty = 0; 431 | 432 | if(node->ind) { 433 | /* read the indirect block */ 434 | blk_read(fs->bdev, node->ind, 1, barr); 435 | } else { 436 | /* does not exist... try to allocate if requested */ 437 | if(!allocate || !(node->ind = alloc_block(fs))) { 438 | res = 0; 439 | goto end; 440 | } 441 | 442 | /* allocated a block clear the buffer, and invalidate everything */ 443 | memset(barr, 0, sizeof fs->sb->blksize); 444 | node_dirty = 1; 445 | ind_dirty = 1; 446 | } 447 | 448 | idx = boffs - NDIRBLK; 449 | 450 | if(!(res = barr[idx])) { 451 | if(allocate && (res = barr[idx] = alloc_block(fs))) { 452 | ind_dirty = 1; 453 | } 454 | } 455 | 456 | /* write back the indirect block if needed */ 457 | if(ind_dirty) { 458 | blk_write(fs->bdev, node->ind, 1, barr); 459 | } 460 | goto end; 461 | } 462 | 463 | /* TODO check/rewrite this */ 464 | #if 0 465 | /* is it a double-indirect block ? */ 466 | if(boffs < MAX_DIND) { 467 | /* first read the dind block and find the index of the ind block */ 468 | if(!node->dind) { 469 | if(allocate) { 470 | /* allocate and zero-out the double indirect block */ 471 | res = node->dind = alloc_block(fs); 472 | if(res) { 473 | zero_block(fs, res); 474 | } 475 | } else { 476 | res = 0; 477 | goto end; 478 | } 479 | } 480 | blk_read(fd->bdev, node->dind, 1, barr); 481 | idx = (boffs - MAX_IND) / BLK_BLKID; 482 | 483 | /* then read the ind block and find the index of the block */ 484 | if(!barr[idx]) { 485 | res = 0; 486 | goto end; 487 | } 488 | blk_read(fd->bdev, barr[idx], 1, barr); 489 | res = barr[(boffs - MAX_IND) % BLK_BLKID]; 490 | } 491 | #endif 492 | 493 | end: 494 | if(node_dirty) { 495 | put_inode(fs, node); 496 | } 497 | free(barr); 498 | return res; 499 | } 500 | -------------------------------------------------------------------------------- /src/fs.h: -------------------------------------------------------------------------------- 1 | #ifndef FS_H_ 2 | #define FS_H_ 3 | 4 | #include 5 | 6 | #define MAGIC 0xccf5ccf5 7 | #define FS_VER 1 8 | #define BLKSZ 1024 9 | 10 | #define NAME_MAX 27 /* +1 termin. +4 ino = 32 per dirent */ 11 | #define PATH_MAX 256 12 | 13 | #define SECT_TO_BLK(x) ((x) / (BLKSZ / 512)) 14 | 15 | #define DEVNO(maj, min) ((((maj) & 0xff) << 8) | ((min) & 0xff)) 16 | #define DEV_MAJOR(dev) (((dev) >> 8) & 0xff) 17 | #define DEV_MINOR(dev) ((dev) & 0xff) 18 | 19 | 20 | typedef uint32_t dev_t; 21 | typedef uint32_t blkid; 22 | 23 | 24 | /* 20 direct blocks + 10 attributes + 2 indirect = 128 bytes per inode */ 25 | #define NDIRBLK 20 26 | struct inode { 27 | int ino; 28 | int uid, gid, mode; 29 | int nlink; 30 | dev_t dev; 31 | uint32_t atime, ctime, mtime; 32 | uint32_t size; 33 | blkid blk[NDIRBLK]; /* direct blocks */ 34 | blkid ind; /* indirect */ 35 | blkid dind; /* double-indirect */ 36 | } __attribute__((packed)); 37 | 38 | struct dir_entry { 39 | int ino; 40 | char name[NAME_MAX + 1]; 41 | } __attribute__((packed)); 42 | 43 | struct superblock { 44 | uint32_t magic; /* magic number */ 45 | int ver; /* filesystem version */ 46 | int blksize; /* only BLKSZ supported at the moment */ 47 | 48 | /* total number of blocks */ 49 | unsigned int num_blocks; 50 | /* total number of inodes */ 51 | unsigned int num_inodes; 52 | 53 | /* inode allocation bitmap start and count */ 54 | blkid ibm_start; 55 | unsigned int ibm_count; 56 | /* inode table start and count */ 57 | blkid itbl_start; 58 | unsigned int itbl_count; 59 | /* block allocation bitmap start and count */ 60 | blkid bm_start; 61 | unsigned int bm_count; 62 | 63 | int root_ino; /* root direcotry inode number */ 64 | 65 | /* the following are valid only at runtime, ignored on disk */ 66 | uint32_t *ibm; /* in-memory inode bitmap */ 67 | uint32_t *bm; /* in-memory block bitmap */ 68 | struct inode *root; /* in-memory root inode */ 69 | 70 | } __attribute__((packed)); 71 | 72 | 73 | 74 | struct filesys { 75 | struct block_device *bdev; 76 | 77 | struct superblock *sb; 78 | 79 | void *zeroblock; 80 | 81 | struct filesys *next; 82 | }; 83 | 84 | /* defined in fs.c */ 85 | int openfs(struct filesys *fs, dev_t dev); 86 | int mkfs(struct filesys *fs, dev_t dev); 87 | void closefs(struct filesys *fs); 88 | int find_inode(const char *path); 89 | 90 | /* defined in fs_sys.c */ 91 | int sys_mount(char *mntpt, char *devname, unsigned int flags); 92 | int sys_umount(char *devname); 93 | 94 | int sys_open(char *pathname, int flags, unsigned int mode); 95 | int sys_close(int fd); 96 | 97 | int sys_read(int fd, void *buf, int sz); 98 | int sys_write(int fd, void *buf, int sz); 99 | long sys_lseek(int fd, long offs, int from); 100 | 101 | 102 | #endif /* FS_H_ */ 103 | -------------------------------------------------------------------------------- /src/fs_sys.c: -------------------------------------------------------------------------------- 1 | /* implementation of the filesystem-related syscalls */ 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "fs.h" 9 | #include "part.h" 10 | #include "panic.h" 11 | #include "bdev.h" 12 | #include "ata.h" 13 | 14 | static dev_t find_rootfs(void); 15 | 16 | /* list of mounted filesystems 17 | * XXX currently only one, the root filesystem 18 | */ 19 | static struct filesys *fslist; 20 | 21 | 22 | int sys_mount(char *mtpt, char *devname, unsigned int flags) 23 | { 24 | dev_t dev; 25 | int err; 26 | struct filesys *fs; 27 | 28 | if(strcmp(mtpt, "/") != 0) { 29 | printf("only root can be mounted at the moment\n"); 30 | return -EBUG; 31 | } 32 | 33 | /* mounting root filesystem */ 34 | if(fslist) { 35 | printf("root already mounted\n"); 36 | return -EBUSY; 37 | } 38 | 39 | if(devname) { 40 | dev = bdev_by_name(devname); 41 | } else { 42 | /* try to autodetect it */ 43 | dev = find_rootfs(); 44 | } 45 | if(!dev) { 46 | err = -ENOENT; 47 | goto rootfail; 48 | } 49 | 50 | if(!(fs = malloc(sizeof *fslist))) { 51 | err = -ENOMEM; 52 | goto rootfail; 53 | } 54 | if((err = openfs(fs, dev)) != 0) { 55 | free(fs); 56 | goto rootfail; 57 | } 58 | 59 | fslist = fs; 60 | return 0; 61 | 62 | rootfail: 63 | panic("failed to mount root filesystem: %d\n", -err); 64 | return err; /* unreachable */ 65 | } 66 | 67 | #define PART_TYPE 0xcc 68 | static dev_t find_rootfs(void) 69 | { 70 | dev_t dev = 0; 71 | #if 0 72 | int i, num_dev, partid; 73 | struct partition *plist, *p; 74 | struct superblock *sb = malloc(BLKSZ); 75 | char name[16]; 76 | 77 | assert(sb); 78 | 79 | num_dev = ata_num_devices(); 80 | for(i=0; istart_sect / 2 + 1, BLKSZ, sb); 90 | 91 | if(sb->magic == MAGIC) { 92 | sprintf(name, "ata%dp%d", i, partid); 93 | printf("found root: %s\n", name); 94 | dev = bdev_by_name(name); 95 | break; 96 | } 97 | } 98 | p = p->next; 99 | partid++; 100 | } 101 | free_part_list(plist); 102 | if(dev) break; 103 | } 104 | 105 | free(sb); 106 | #endif 107 | return dev; 108 | } 109 | -------------------------------------------------------------------------------- /src/interrupts.h: -------------------------------------------------------------------------------- 1 | #ifdef ASM 2 | /* included from intr-asm.S */ 3 | #define INTR_ENTRY_EC(n, name) ientry_err n, name 4 | #define INTR_ENTRY_NOEC(n, name) ientry_noerr n, name 5 | #else 6 | /* included from intr.c inside init_intr() */ 7 | #define INTR_ENTRY_EC(n, name) \ 8 | void intr_entry_##name(void); \ 9 | set_intr_entry(n, intr_entry_##name); 10 | #define INTR_ENTRY_NOEC(n, name) INTR_ENTRY_EC(n, name) 11 | #endif /* ASM */ 12 | 13 | /* faults/traps/aborts (plus NMI) */ 14 | INTR_ENTRY_NOEC(0, div) 15 | INTR_ENTRY_NOEC(1, debug) 16 | INTR_ENTRY_NOEC(2, nmi) 17 | INTR_ENTRY_NOEC(3, bp) 18 | INTR_ENTRY_NOEC(4, overflow) 19 | INTR_ENTRY_NOEC(5, bound) 20 | INTR_ENTRY_NOEC(6, ill) 21 | INTR_ENTRY_NOEC(7, nodev) 22 | INTR_ENTRY_EC(8, dfault) 23 | INTR_ENTRY_NOEC(9, copseg) 24 | INTR_ENTRY_EC(10, tss) 25 | INTR_ENTRY_EC(11, segpres) 26 | INTR_ENTRY_EC(12, stack) 27 | INTR_ENTRY_EC(13, prot) 28 | INTR_ENTRY_EC(14, page) 29 | INTR_ENTRY_NOEC(15, reserved) 30 | INTR_ENTRY_NOEC(16, fpu) 31 | INTR_ENTRY_EC(17, align) 32 | INTR_ENTRY_NOEC(18, mce) 33 | INTR_ENTRY_NOEC(19, sse) 34 | /* redirected IRQs */ 35 | INTR_ENTRY_NOEC(32, irq0) 36 | INTR_ENTRY_NOEC(33, irq1) 37 | INTR_ENTRY_NOEC(34, irq2) 38 | INTR_ENTRY_NOEC(35, irq3) 39 | INTR_ENTRY_NOEC(36, irq4) 40 | INTR_ENTRY_NOEC(37, irq5) 41 | INTR_ENTRY_NOEC(38, irq6) 42 | INTR_ENTRY_NOEC(39, irq7) 43 | INTR_ENTRY_NOEC(40, irq8) 44 | INTR_ENTRY_NOEC(41, irq9) 45 | INTR_ENTRY_NOEC(42, irq10) 46 | INTR_ENTRY_NOEC(43, irq11) 47 | INTR_ENTRY_NOEC(44, irq12) 48 | INTR_ENTRY_NOEC(45, irq13) 49 | INTR_ENTRY_NOEC(46, irq14) 50 | INTR_ENTRY_NOEC(47, irq15) 51 | /* system call interrupt */ 52 | INTR_ENTRY_NOEC(128, syscall) 53 | /* default interrupt */ 54 | INTR_ENTRY_NOEC(255, default) 55 | -------------------------------------------------------------------------------- /src/intr-asm.S: -------------------------------------------------------------------------------- 1 | #define ASM 2 | #include "segm.h" 3 | 4 | .data 5 | .align 4 6 | .short 0 7 | /* memory reserved for set_idt */ 8 | lim:.short 0 9 | addr:.long 0 10 | 11 | .text 12 | /* set_idt(uint32_t addr, uint16_t limit) 13 | * loads the IDTR with the new address and limit for the IDT */ 14 | .globl set_idt 15 | set_idt: 16 | movl 4(%esp), %eax 17 | movl %eax, (addr) 18 | movw 8(%esp), %ax 19 | movw %ax, (lim) 20 | lidt (lim) 21 | ret 22 | 23 | /* get_intr_state() 24 | * returns 1 if interrutps are enabled, 0 if disabled */ 25 | .globl get_intr_state 26 | get_intr_state: 27 | pushf 28 | popl %eax 29 | shr $9, %eax /* bit 9 of eflags is IF */ 30 | andl $1, %eax 31 | ret 32 | 33 | /* set_intr_state(int state) 34 | * enables interrupts if the argument is non-zero, disables them otherwise */ 35 | .globl set_intr_state 36 | set_intr_state: 37 | cmpl $0, 4(%esp) 38 | jz 0f 39 | sti 40 | ret 41 | 0: cli 42 | ret 43 | 44 | 45 | /* interrupt entry with error code macro 46 | * this macro generates an interrupt entry point for the 47 | * exceptions which include error codes in the stack frame 48 | */ 49 | .macro ientry_err n name 50 | .globl intr_entry_\name 51 | intr_entry_\name: 52 | pushl $\n 53 | jmp intr_entry_common 54 | .endm 55 | 56 | /* interrupt entry without error code macro 57 | * this macro generates an interrupt entry point for the interrupts 58 | * and exceptions which do not include error codes in the stack frame 59 | * it pushes a dummy error code (0), to make the stack frame identical 60 | */ 61 | .macro ientry_noerr n name 62 | .globl intr_entry_\name 63 | intr_entry_\name: 64 | pushl $0 65 | pushl $\n 66 | jmp intr_entry_common 67 | .endm 68 | 69 | /* common code used by all entry points. calls dispatch_intr() 70 | * defined in intr.c 71 | */ 72 | .extern dispatch_intr 73 | intr_entry_common: 74 | /* save the current data segment selectors */ 75 | pushl %gs 76 | pushl %fs 77 | pushl %es 78 | pushl %ds 79 | /* save general purpose registers */ 80 | pusha 81 | /* if we entered from userspace ss and cs is set correctly, but 82 | * we must make sure all the other selectors are set to the 83 | * kernel data segment */ 84 | mov %ss, %eax 85 | mov %eax, %ds 86 | mov %eax, %es 87 | mov %eax, %fs 88 | mov %eax, %gs 89 | call dispatch_intr 90 | intr_ret_local: 91 | /* restore general purpose registers */ 92 | popa 93 | /* restore data segment selectors */ 94 | popl %ds 95 | popl %es 96 | popl %fs 97 | popl %gs 98 | /* remove error code and intr num from stack */ 99 | add $8, %esp 100 | iret 101 | 102 | /* intr_ret is called by context_switch to return from the kernel 103 | * to userspace. The argument is a properly formed intr_frame 104 | * structure with the saved context of the new task. 105 | * 106 | * First thing to do is remove the return address pointing back 107 | * to context_switch, which then leaves us with a proper interrupt 108 | * stack frame, so we can jump right in the middle of the regular 109 | * interrupt return code above. 110 | */ 111 | .globl intr_ret 112 | intr_ret: 113 | add $4, %esp 114 | jmp intr_ret_local 115 | 116 | /* by including interrupts.h with ASM defined, the macros above 117 | * are expanded to generate all required interrupt entry points 118 | */ 119 | #include 120 | -------------------------------------------------------------------------------- /src/intr.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "intr.h" 3 | #include "desc.h" 4 | #include "segm.h" 5 | #include "asmops.h" 6 | #include "panic.h" 7 | #include "syscall.h" 8 | 9 | /* IDT gate descriptor bits */ 10 | #define GATE_TASK (5 << 8) 11 | #define GATE_INTR (6 << 8) 12 | #define GATE_TRAP (7 << 8) 13 | #define GATE_DEFAULT (1 << 11) 14 | #define GATE_PRESENT (1 << 15) 15 | 16 | /* PIC command and data ports */ 17 | #define PIC1_CMD 0x20 18 | #define PIC1_DATA 0x21 19 | #define PIC2_CMD 0xa0 20 | #define PIC2_DATA 0xa1 21 | 22 | /* PIC initialization command word 1 bits */ 23 | #define ICW1_ICW4_NEEDED (1 << 0) 24 | #define ICW1_SINGLE (1 << 1) 25 | #define ICW1_INTERVAL4 (1 << 2) 26 | #define ICW1_LEVEL (1 << 3) 27 | #define ICW1_INIT (1 << 4) 28 | /* PIC initialization command word 4 bits */ 29 | #define ICW4_8086 (1 << 0) 30 | #define ICW4_AUTO_EOI (1 << 1) 31 | #define ICW4_BUF_SLAVE (1 << 3) /* 1000 */ 32 | #define ICW4_BUF_MASTER (3 << 2) /* 1100 */ 33 | #define ICW4_SPECIAL (1 << 4) 34 | 35 | /* PIC operation command word 2 bits */ 36 | #define OCW2_EOI (1 << 5) 37 | 38 | 39 | static void init_pic(int offset); 40 | static void gate_desc(desc_t *desc, uint16_t sel, uint32_t addr, int dpl, int type); 41 | static void set_intr_entry(int num, void (*handler)(void)); 42 | 43 | /* defined in intr-asm.S */ 44 | void set_idt(uint32_t addr, uint16_t limit); 45 | void intr_entry_default(void); 46 | 47 | /* the IDT (interrupt descriptor table) */ 48 | static desc_t idt[256]; 49 | /* table of handler functions for all interrupts */ 50 | static intr_func_t intr_func[256]; 51 | 52 | static struct intr_frame *cur_intr_frame; 53 | static int eoi_pending; 54 | 55 | 56 | void init_intr(void) 57 | { 58 | int i; 59 | 60 | set_idt((uint32_t)idt, sizeof idt - 1); 61 | 62 | /* initialize all entry points and interrupt handlers */ 63 | for(i=0; i<256; i++) { 64 | set_intr_entry(i, intr_entry_default); 65 | interrupt(i, 0); 66 | } 67 | 68 | /* by including interrupts.h here (without ASM being defined) 69 | * the series of INTR_ENTRY_* macros will be expanded to a series 70 | * of function prototypes for all interrupt entry points and the 71 | * corresponding calls to set_intr_entry to set up the IDT slots 72 | */ 73 | #include "interrupts.h" 74 | 75 | /* initialize the programmable interrupt controller 76 | * setting up the maping of IRQs [0, 15] to interrupts [32, 47] 77 | */ 78 | init_pic(IRQ_OFFSET); 79 | eoi_pending = 0; 80 | } 81 | 82 | /* retrieve the current interrupt frame. 83 | * returns 0 when called during kernel init. 84 | */ 85 | struct intr_frame *get_intr_frame(void) 86 | { 87 | return cur_intr_frame; 88 | } 89 | 90 | /* set an interrupt handler function for a particular interrupt */ 91 | void interrupt(int intr_num, intr_func_t func) 92 | { 93 | intr_func[intr_num] = func; 94 | } 95 | 96 | /* this function is called from all interrupt entry points 97 | * it calls the appropriate interrupt handlers if available and handles 98 | * sending an end-of-interrupt command to the PICs when finished. 99 | */ 100 | void dispatch_intr(struct intr_frame frm) 101 | { 102 | cur_intr_frame = &frm; 103 | 104 | if(IS_IRQ(frm.inum)) { 105 | eoi_pending = frm.inum; 106 | } 107 | 108 | if(intr_func[frm.inum]) { 109 | intr_func[frm.inum](frm.inum); 110 | } else { 111 | if(frm.inum < 32) { 112 | panic("unhandled exception %d, error code: %d\n", frm.inum, frm.err); 113 | } 114 | printf("unhandled interrupt %d\n", frm.inum); 115 | } 116 | 117 | disable_intr(); 118 | if(eoi_pending) { 119 | end_of_irq(INTR_TO_IRQ(eoi_pending)); 120 | } 121 | } 122 | 123 | static void init_pic(int offset) 124 | { 125 | /* send ICW1 saying we'll follow with ICW4 later on */ 126 | outb(ICW1_INIT | ICW1_ICW4_NEEDED, PIC1_CMD); 127 | outb(ICW1_INIT | ICW1_ICW4_NEEDED, PIC2_CMD); 128 | /* send ICW2 with IRQ remapping */ 129 | outb(offset, PIC1_DATA); 130 | outb(offset + 8, PIC2_DATA); 131 | /* send ICW3 to setup the master/slave relationship */ 132 | /* ... set bit3 = 3rd interrupt input has a slave */ 133 | outb(4, PIC1_DATA); 134 | /* ... set slave ID to 2 */ 135 | outb(2, PIC2_DATA); 136 | /* send ICW4 to set 8086 mode (no calls generated) */ 137 | outb(ICW4_8086, PIC1_DATA); 138 | outb(ICW4_8086, PIC2_DATA); 139 | /* done, just reset the data port to 0 */ 140 | outb(0, PIC1_DATA); 141 | outb(0, PIC2_DATA); 142 | } 143 | 144 | static void gate_desc(desc_t *desc, uint16_t sel, uint32_t addr, int dpl, int type) 145 | { 146 | /* first 16bit part is the low 16bits of the entry address */ 147 | desc->d[0] = addr & 0xffff; 148 | /* second 16bit part is the segment selector for the entry code */ 149 | desc->d[1] = sel; 150 | /* third 16bit part has the privilege level, type, and present bit */ 151 | desc->d[2] = ((dpl & 3) << 13) | type | GATE_DEFAULT | GATE_PRESENT; 152 | /* last 16bit part is the high 16bits of the entry address */ 153 | desc->d[3] = (addr & 0xffff0000) >> 16; 154 | } 155 | 156 | #define IS_TRAP(n) ((n) >= 32 && !IS_IRQ(n)) 157 | static void set_intr_entry(int num, void (*handler)(void)) 158 | { 159 | int type = IS_TRAP(num) ? GATE_TRAP : GATE_INTR; 160 | 161 | /* the syscall interrupt has to have a dpl of 3 otherwise calling it from 162 | * user space will raise a general protection exception. All the rest should 163 | * have a dpl of 0 to disallow user programs to execute critical interrupt 164 | * handlers and possibly crashing the system. 165 | */ 166 | int dpl = (num == SYSCALL_INT) ? 3 : 0; 167 | 168 | gate_desc(idt + num, selector(SEGM_KCODE, 0), (uint32_t)handler, dpl, type); 169 | } 170 | 171 | void end_of_irq(int irq) 172 | { 173 | int intr_state = get_intr_state(); 174 | disable_intr(); 175 | 176 | if(!eoi_pending) { 177 | return; 178 | } 179 | eoi_pending = 0; 180 | 181 | if(irq > 7) { 182 | outb(OCW2_EOI, PIC2_CMD); 183 | } 184 | outb(OCW2_EOI, PIC1_CMD); 185 | 186 | set_intr_state(intr_state); 187 | } 188 | -------------------------------------------------------------------------------- /src/intr.h: -------------------------------------------------------------------------------- 1 | #ifndef INTR_H_ 2 | #define INTR_H_ 3 | 4 | #include 5 | #include "asmops.h" 6 | 7 | /* offset used to remap IRQ numbers (+32) */ 8 | #define IRQ_OFFSET 32 9 | /* conversion macros between IRQ and interrupt numbers */ 10 | #define IRQ_TO_INTR(x) ((x) + IRQ_OFFSET) 11 | #define INTR_TO_IRQ(x) ((x) - IRQ_OFFSET) 12 | /* checks whether a particular interrupt is an remapped IRQ */ 13 | #define IS_IRQ(n) ((n) >= IRQ_OFFSET && (n) < IRQ_OFFSET + 16) 14 | 15 | /* structure used to pass the interrupt stack frame from the 16 | * entry points to the C dispatch function. 17 | */ 18 | struct intr_frame { 19 | /* registers pushed by pusha in intr_entry_* */ 20 | struct registers regs; 21 | /* data segment selectors */ 22 | uint32_t ds, es, fs, gs; 23 | /* interrupt number and error code pushed in intr_entry_* */ 24 | uint32_t inum, err; 25 | /* pushed by CPU during interrupt entry */ 26 | uint32_t eip, cs, eflags; 27 | /* pushed by CPU during interrupt entry from user space */ 28 | uint32_t esp, ss; 29 | } __attribute__ ((packed)); 30 | 31 | 32 | 33 | typedef void (*intr_func_t)(int); 34 | 35 | 36 | void init_intr(void); 37 | 38 | struct intr_frame *get_intr_frame(void); 39 | 40 | void interrupt(int intr_num, intr_func_t func); 41 | 42 | /* defined in intr-asm.S */ 43 | int get_intr_state(void); 44 | void set_intr_state(int s); 45 | 46 | void intr_ret(struct intr_frame ifrm); 47 | 48 | void end_of_irq(int irq); 49 | 50 | #endif /* INTR_H_ */ 51 | -------------------------------------------------------------------------------- /src/klibc/assert.h: -------------------------------------------------------------------------------- 1 | #ifndef ASSERT_H_ 2 | #define ASSERT_H_ 3 | 4 | #include "panic.h" 5 | 6 | #define assert(x) \ 7 | if(!(x)) { \ 8 | panic("Kernel assertion failed at " __FILE__ ":%d: " #x "\n", __LINE__); \ 9 | } 10 | 11 | #endif /* ASSERT_H_ */ 12 | -------------------------------------------------------------------------------- /src/klibc/ctype.c: -------------------------------------------------------------------------------- 1 | #include "ctype.h" 2 | 3 | int isalnum(int c) 4 | { 5 | return isalpha(c) || isdigit(c); 6 | } 7 | 8 | int isalpha(int c) 9 | { 10 | return isupper(c) || islower(c); 11 | } 12 | 13 | int isblank(int c) 14 | { 15 | return c == ' ' || c == '\t'; 16 | } 17 | 18 | int isdigit(int c) 19 | { 20 | return c >= '0' && c <= '9'; 21 | } 22 | 23 | int isupper(int c) 24 | { 25 | return c >= 'A' && c <= 'Z'; 26 | } 27 | 28 | int islower(int c) 29 | { 30 | return c >= 'a' && c <= 'z'; 31 | } 32 | 33 | int isgraph(int c) 34 | { 35 | return c > ' ' && c <= '~'; 36 | } 37 | 38 | int isprint(int c) 39 | { 40 | return isgraph(c) || c == ' '; 41 | } 42 | 43 | int isspace(int c) 44 | { 45 | return isblank(c) || c == '\f' || c == '\n' || c == '\r' || c == '\v'; 46 | } 47 | 48 | int toupper(int c) 49 | { 50 | return islower(c) ? (c + ('A' - 'a')) : c; 51 | } 52 | 53 | int tolower(int c) 54 | { 55 | return isupper(c) ? (c + ('A' - 'a')) : c; 56 | } 57 | -------------------------------------------------------------------------------- /src/klibc/ctype.h: -------------------------------------------------------------------------------- 1 | #ifndef CTYPE_H_ 2 | #define CTYPE_H_ 3 | 4 | int isalnum(int c); 5 | int isalpha(int c); 6 | #define isascii(c) ((c) < 128) 7 | int isblank(int c); 8 | int isdigit(int c); 9 | int isupper(int c); 10 | int islower(int c); 11 | int isprint(int c); 12 | int isspace(int c); 13 | 14 | int toupper(int c); 15 | int tolower(int c); 16 | 17 | #endif /* CTYPE_H_ */ 18 | -------------------------------------------------------------------------------- /src/klibc/errno.h: -------------------------------------------------------------------------------- 1 | #ifndef ERRNO_H_ 2 | #define ERRNO_H_ 3 | 4 | /* all kernel-definitions that should be shared with user space 5 | * are in include/kdef.h 6 | */ 7 | #include "kdef.h" 8 | 9 | #endif /* ERRNO_H_ */ 10 | -------------------------------------------------------------------------------- /src/klibc/inttypes.h: -------------------------------------------------------------------------------- 1 | #ifndef INTTYPES_H_ 2 | #define INTTYPES_H_ 3 | 4 | typedef char int8_t; 5 | typedef short int16_t; 6 | typedef int int32_t; 7 | typedef long long int64_t; 8 | 9 | typedef unsigned char uint8_t; 10 | typedef unsigned short uint16_t; 11 | typedef unsigned int uint32_t; 12 | typedef unsigned long long uint64_t; 13 | 14 | #endif /* INTTYPES_H_ */ 15 | -------------------------------------------------------------------------------- /src/klibc/malloc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "intr.h" 3 | #include "vm.h" 4 | #include "panic.h" 5 | 6 | #define MAGIC 0xbaadbeef 7 | 8 | struct mem_range { 9 | uint32_t start; 10 | size_t size; 11 | struct mem_range *next; 12 | }; 13 | 14 | struct alloc_desc { 15 | size_t size; 16 | uint32_t magic; 17 | }; 18 | 19 | static void add_range(struct mem_range *rng); 20 | static void coalesce(struct mem_range *low, struct mem_range *mid, struct mem_range *high); 21 | static struct mem_range *alloc_node(void); 22 | static void free_node(struct mem_range *node); 23 | 24 | struct mem_range *free_list; 25 | struct mem_range *node_pool; 26 | 27 | 28 | void *malloc(size_t sz) 29 | { 30 | void *res = 0; 31 | struct mem_range *node, *prev, dummy; 32 | int intr_state; 33 | struct alloc_desc *desc; 34 | size_t alloc_size = sz + sizeof *desc; 35 | 36 | if(!sz) { 37 | return 0; 38 | } 39 | 40 | /* entering the critical section, do not disturb */ 41 | intr_state = get_intr_state(); 42 | disable_intr(); 43 | 44 | find_range: 45 | prev = &dummy; 46 | dummy.next = node = free_list; 47 | while(node) { 48 | /* find a node in the free_list with enough space ... */ 49 | if(node->size >= alloc_size) { 50 | /* insert the allocation descriptor at the beginning */ 51 | desc = (void*)node->start; 52 | desc->size = alloc_size; 53 | desc->magic = MAGIC; 54 | res = desc + 1; /* that's what we'll return to the user */ 55 | 56 | /* modify the node to reflect the new range after we 57 | * grabbed a part at the beginning... 58 | */ 59 | node->size -= alloc_size; 60 | node->start += alloc_size; 61 | 62 | /* if the range represented by this node now has zero size, 63 | * remove and free the node (it goes into the node_pool) 64 | */ 65 | if(!node->size) { 66 | prev->next = node->next; 67 | if(free_list == node) { 68 | free_list = node->next; 69 | } 70 | free_node(node); 71 | } 72 | break; 73 | } 74 | prev = node; 75 | node = node->next; 76 | } 77 | 78 | /* we didn't find a range big enough in the free_list. In that case 79 | * we need to allocate some pages, add them to the free_list and try 80 | * again. 81 | */ 82 | if(!res) { 83 | struct mem_range *range; 84 | int pg, pgcount = (PGSIZE - 1 + alloc_size) / PGSIZE; 85 | 86 | if((pg = pgalloc(pgcount, MEM_KERNEL)) == -1) { 87 | set_intr_state(intr_state); 88 | return 0; 89 | } 90 | 91 | range = alloc_node(); 92 | range->start = PAGE_TO_ADDR(pg); 93 | range->size = pgcount * PGSIZE; 94 | add_range(range); 95 | goto find_range; 96 | } 97 | 98 | set_intr_state(intr_state); 99 | return res; 100 | } 101 | 102 | void free(void *ptr) 103 | { 104 | int intr_state; 105 | struct alloc_desc *desc; 106 | struct mem_range *rng; 107 | 108 | if(!ptr) return; 109 | 110 | intr_state = get_intr_state(); 111 | disable_intr(); 112 | 113 | desc = (struct alloc_desc*)ptr - 1; 114 | if(desc->magic != MAGIC) { 115 | panic("free(%x) magic missmatch, invalid address.\n", (unsigned int)ptr); 116 | } 117 | 118 | rng = alloc_node(); 119 | rng->start = (uint32_t)desc; 120 | rng->size = desc->size; 121 | add_range(rng); 122 | 123 | set_intr_state(intr_state); 124 | } 125 | 126 | static void add_range(struct mem_range *rng) 127 | { 128 | struct mem_range *node, *prev = 0; 129 | 130 | if(!free_list || free_list->start > rng->start) { 131 | rng->next = free_list; 132 | free_list = rng; 133 | 134 | } else { 135 | node = free_list; 136 | 137 | while(node) { 138 | if(!node->next || node->next->start > rng->start) { 139 | rng->next = node->next; 140 | node->next = rng; 141 | prev = node; /* needed by coalesce after the loop */ 142 | break; 143 | } 144 | 145 | prev = node; 146 | node = node->next; 147 | } 148 | } 149 | 150 | coalesce(prev, rng, rng->next); 151 | } 152 | 153 | static void coalesce(struct mem_range *low, struct mem_range *mid, struct mem_range *high) 154 | { 155 | if(high) { 156 | if(mid->start + mid->size >= high->start) { 157 | mid->size = high->size - mid->start; 158 | mid->next = high->next; 159 | free_node(high); 160 | } 161 | } 162 | 163 | if(low) { 164 | if(low->start + low->size >= mid->start) { 165 | low->size = mid->size - low->start; 166 | low->next = mid->next; 167 | free_node(mid); 168 | } 169 | } 170 | } 171 | 172 | static struct mem_range *alloc_node(void) 173 | { 174 | struct mem_range *node; 175 | 176 | /* no nodes available for reuse... 177 | * grab a page, slice it into nodes, link them up and hang them in the pool 178 | */ 179 | if(!node_pool) { 180 | int i, num_nodes, pg; 181 | struct mem_range *nodepage; 182 | 183 | pg = pgalloc(1, MEM_KERNEL); 184 | if(pg == -1) { 185 | panic("failed to allocate page for the malloc node pool\n"); 186 | return 0; /* unreachable */ 187 | } 188 | 189 | nodepage = (struct mem_range*)PAGE_TO_ADDR(pg); 190 | num_nodes = PGSIZE / sizeof *nodepage; 191 | 192 | for(i=1; inext; 201 | node->next = 0; 202 | return node; 203 | } 204 | 205 | static void free_node(struct mem_range *node) 206 | { 207 | node->next = node_pool; 208 | node_pool = node; 209 | } 210 | -------------------------------------------------------------------------------- /src/klibc/stdarg.h: -------------------------------------------------------------------------------- 1 | #ifndef STDARG_H_ 2 | #define STDARG_H_ 3 | 4 | /* Assumes that arguments are passed on the stack 4-byte aligned */ 5 | 6 | typedef int* va_list; 7 | 8 | #define va_start(ap, last) ((ap) = (int*)&(last) + 1) 9 | #define va_arg(ap, type) (*(type*)(ap)++) 10 | #define va_end(ap) 11 | 12 | #endif /* STDARG_H_ */ 13 | -------------------------------------------------------------------------------- /src/klibc/stdio.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | static void bwrite(char *buf, size_t buf_sz, char *str, int sz); 6 | static int intern_printf(char *buf, size_t sz, const char *fmt, va_list ap); 7 | 8 | /* putchar is defined in term.c */ 9 | 10 | int puts(const char *s) 11 | { 12 | while(*s) { 13 | putchar(*s++); 14 | } 15 | putchar('\n'); 16 | return 0; 17 | } 18 | 19 | /* -- printf and friends -- */ 20 | 21 | static char *convc = "dioxXucsfeEgGpn%"; 22 | 23 | #define IS_CONV(c) strchr(convc, c) 24 | 25 | int printf(const char *fmt, ...) 26 | { 27 | int res; 28 | va_list ap; 29 | 30 | va_start(ap, fmt); 31 | res = intern_printf(0, 0, fmt, ap); 32 | va_end(ap); 33 | return res; 34 | } 35 | 36 | int vprintf(const char *fmt, va_list ap) 37 | { 38 | return intern_printf(0, 0, fmt, ap); 39 | } 40 | 41 | int sprintf(char *buf, const char *fmt, ...) 42 | { 43 | int res; 44 | va_list ap; 45 | 46 | va_start(ap, fmt); 47 | res = intern_printf(buf, 0, fmt, ap); 48 | va_end(ap); 49 | return res; 50 | } 51 | 52 | int vsprintf(char *buf, const char *fmt, va_list ap) 53 | { 54 | return intern_printf(buf, 0, fmt, ap); 55 | } 56 | 57 | int snprintf(char *buf, size_t sz, const char *fmt, ...) 58 | { 59 | int res; 60 | va_list ap; 61 | 62 | va_start(ap, fmt); 63 | res = intern_printf(buf, sz, fmt, ap); 64 | va_end(ap); 65 | return res; 66 | } 67 | 68 | int vsnprintf(char *buf, size_t sz, const char *fmt, va_list ap) 69 | { 70 | return intern_printf(buf, sz, fmt, ap); 71 | } 72 | 73 | 74 | /* intern_printf provides all the functionality needed by all the printf 75 | * variants. 76 | * - buf: optional buffer onto which the formatted results are written. If null 77 | * then the output goes to the terminal through putchar calls. This is used 78 | * by the (v)sprintf variants which write to an array of char. 79 | * - sz: optional maximum size of the output, 0 means unlimited. This is used 80 | * by the (v)snprintf variants to avoid buffer overflows. 81 | * The rest are obvious, format string and variable argument list. 82 | */ 83 | 84 | #define BUF(x) ((x) ? (x) + cnum : (x)) 85 | #define SZ(x) ((x) ? (x) - cnum : (x)) 86 | 87 | static int intern_printf(char *buf, size_t sz, const char *fmt, va_list ap) 88 | { 89 | char conv_buf[32]; 90 | char *str; 91 | int i, slen; 92 | const char *fstart = 0; 93 | 94 | /* state */ 95 | int cnum = 0; 96 | int base = 10; 97 | int alt = 0; 98 | int fwidth = 0; 99 | int padc = ' '; 100 | int sign = 0; 101 | int left_align = 0; /* not implemented yet */ 102 | int hex_caps = 0; 103 | int unsig = 0; 104 | 105 | while(*fmt) { 106 | if(*fmt == '%') { 107 | fstart = fmt++; 108 | continue; 109 | } 110 | 111 | if(fstart) { 112 | if(IS_CONV(*fmt)) { 113 | switch(*fmt) { 114 | case 'X': 115 | hex_caps = 1; 116 | 117 | case 'x': 118 | case 'p': 119 | base = 16; 120 | 121 | if(alt) { 122 | bwrite(BUF(buf), SZ(sz), "0x", 2); 123 | } 124 | 125 | case 'u': 126 | unsig = 1; 127 | 128 | if(0) { 129 | case 'o': 130 | base = 8; 131 | 132 | if(alt) { 133 | bwrite(BUF(buf), SZ(sz), "0", 1); 134 | } 135 | } 136 | 137 | case 'd': 138 | case 'i': 139 | if(unsig) { 140 | utoa(va_arg(ap, unsigned int), conv_buf, base); 141 | } else { 142 | itoa(va_arg(ap, int), conv_buf, base); 143 | } 144 | if(hex_caps) { 145 | for(i=0; conv_buf[i]; i++) { 146 | conv_buf[i] = toupper(conv_buf[i]); 147 | } 148 | } 149 | 150 | slen = strlen(conv_buf); 151 | for(i=slen; i 5 | #include 6 | 7 | int putchar(int c); 8 | int puts(const char *s); 9 | 10 | int printf(const char *fmt, ...); 11 | int vprintf(const char *fmt, va_list ap); 12 | 13 | int sprintf(char *buf, const char *fmt, ...); 14 | int vsprintf(char *buf, const char *fmt, va_list ap); 15 | 16 | int snprintf(char *buf, size_t sz, const char *fmt, ...); 17 | int vsnprintf(char *buf, size_t sz, const char *fmt, va_list ap); 18 | 19 | #endif /* STDIO_H_ */ 20 | -------------------------------------------------------------------------------- /src/klibc/stdlib.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int atoi(const char *str) 5 | { 6 | return strtol(str, 0, 10); 7 | } 8 | 9 | long atol(const char *str) 10 | { 11 | return strtol(str, 0, 10); 12 | } 13 | 14 | long strtol(const char *str, char **endp, int base) 15 | { 16 | long acc = 0; 17 | int sign = 1; 18 | 19 | while(isspace(*str)) str++; 20 | 21 | if(base == 0) { 22 | if(str[0] == '0') { 23 | if(str[1] == 'x' || str[1] == 'X') { 24 | base = 16; 25 | } else { 26 | base = 8; 27 | } 28 | } else { 29 | base = 10; 30 | } 31 | } 32 | 33 | if(*str == '+') { 34 | str++; 35 | } else if(*str == '-') { 36 | sign = -1; 37 | str++; 38 | } 39 | 40 | while(*str) { 41 | long val; 42 | char c = tolower(*str); 43 | 44 | if(isdigit(c)) { 45 | val = *str - '0'; 46 | } else if(c >= 'a' || c <= 'f') { 47 | val = 10 + c - 'a'; 48 | } 49 | if(val >= base) { 50 | break; 51 | } 52 | 53 | acc = acc * base + val; 54 | str++; 55 | } 56 | 57 | if(endp) { 58 | *endp = (char*)str; 59 | } 60 | 61 | return sign > 0 ? acc : -acc; 62 | } 63 | 64 | void itoa(int val, char *buf, int base) 65 | { 66 | static char rbuf[16]; 67 | char *ptr = rbuf; 68 | int neg = 0; 69 | 70 | if(val < 0) { 71 | neg = 1; 72 | val = -val; 73 | } 74 | 75 | if(val == 0) { 76 | *ptr++ = '0'; 77 | } 78 | 79 | while(val) { 80 | int digit = val % base; 81 | *ptr++ = digit < 10 ? (digit + '0') : (digit - 10 + 'a'); 82 | val /= base; 83 | } 84 | 85 | if(neg) { 86 | *ptr++ = '-'; 87 | } 88 | 89 | ptr--; 90 | 91 | while(ptr >= rbuf) { 92 | *buf++ = *ptr--; 93 | } 94 | *buf = 0; 95 | } 96 | 97 | void utoa(unsigned int val, char *buf, int base) 98 | { 99 | static char rbuf[16]; 100 | char *ptr = rbuf; 101 | 102 | if(val == 0) { 103 | *ptr++ = '0'; 104 | } 105 | 106 | while(val) { 107 | unsigned int digit = val % base; 108 | *ptr++ = digit < 10 ? (digit + '0') : (digit - 10 + 'a'); 109 | val /= base; 110 | } 111 | 112 | ptr--; 113 | 114 | while(ptr >= rbuf) { 115 | *buf++ = *ptr--; 116 | } 117 | *buf = 0; 118 | } 119 | 120 | -------------------------------------------------------------------------------- /src/klibc/stdlib.h: -------------------------------------------------------------------------------- 1 | #ifndef STDLIB_H_ 2 | #define STDLIB_H_ 3 | 4 | #include 5 | 6 | typedef int32_t ssize_t; 7 | typedef uint32_t size_t; 8 | 9 | int atoi(const char *str); 10 | long atol(const char *str); 11 | long strtol(const char *str, char **endp, int base); 12 | 13 | void itoa(int val, char *buf, int base); 14 | void utoa(unsigned int val, char *buf, int base); 15 | 16 | /* defined in malloc.c */ 17 | void *malloc(size_t sz); 18 | void free(void *ptr); 19 | 20 | #endif /* STDLIB_H_ */ 21 | -------------------------------------------------------------------------------- /src/klibc/string.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void memset(void *s, int c, size_t n) 4 | { 5 | char *ptr = s; 6 | while(n--) { 7 | *ptr++ = c; 8 | } 9 | } 10 | 11 | /* Does the same thing as memset only with 16bit values. 12 | * n in this case is the number of values, not the number of bytes. 13 | */ 14 | void memset16(void *s, int c, size_t n) 15 | { 16 | short *ptr = s; 17 | while(n--) { 18 | *ptr++ = c; 19 | } 20 | } 21 | 22 | void *memcpy(void *dest, const void *src, size_t n) 23 | { 24 | char *dptr = dest; 25 | const char *sptr = src; 26 | 27 | while(n--) { 28 | *dptr++ = *sptr++; 29 | } 30 | return dest; 31 | } 32 | 33 | void *memmove(void *dest, const void *src, size_t n) 34 | { 35 | int i; 36 | char *dptr; 37 | const char *sptr; 38 | 39 | if(dest <= src) { 40 | /* forward copy */ 41 | dptr = dest; 42 | sptr = src; 43 | for(i=0; i= s) { 85 | if(*ptr == c) { 86 | return (char*)ptr; 87 | } 88 | } 89 | return 0; 90 | } 91 | 92 | char *strstr(const char *str, const char *substr) 93 | { 94 | while(*str) { 95 | const char *s1 = str; 96 | const char *s2 = substr; 97 | 98 | while(*s1 && *s1 == *s2) { 99 | s1++; 100 | s2++; 101 | } 102 | if(!*s2) { 103 | return (char*)str; 104 | } 105 | str++; 106 | } 107 | return 0; 108 | } 109 | 110 | int strcmp(const char *s1, const char *s2) 111 | { 112 | while(*s1 && *s1 == *s2) { 113 | s1++; 114 | s2++; 115 | } 116 | return *s1 - *s2; 117 | } 118 | -------------------------------------------------------------------------------- /src/klibc/string.h: -------------------------------------------------------------------------------- 1 | #ifndef STRING_H_ 2 | #define STRING_H_ 3 | 4 | #include 5 | 6 | void memset(void *s, int c, size_t n); 7 | void memset16(void *s, int c, size_t n); 8 | 9 | void *memcpy(void *dest, const void *src, size_t n); 10 | void *memmove(void *dest, const void *src, size_t n); 11 | 12 | size_t strlen(const char *s); 13 | 14 | char *strchr(const char *s, int c); 15 | char *strrchr(const char *s, int c); 16 | 17 | char *strstr(const char *str, const char *substr); 18 | 19 | int strcmp(const char *s1, const char *s2); 20 | 21 | #endif /* STRING_H_ */ 22 | -------------------------------------------------------------------------------- /src/klibc/time.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "time.h" 3 | #include "rtc.h" 4 | #include "timer.h" 5 | #include "config.h" 6 | 7 | #define MINSEC 60 8 | #define HOURSEC (60 * MINSEC) 9 | #define DAYSEC (24 * HOURSEC) 10 | #define YEARDAYS(x) (is_leap_year(x) ? 366 : 365) 11 | 12 | /* 1-1-1970 was a thursday */ 13 | #define EPOCH_WDAY 4 14 | 15 | static int is_leap_year(int yr); 16 | 17 | static int mdays[2][12] = { 18 | {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, 19 | {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31} 20 | }; 21 | 22 | static char *wday[] = { 23 | "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" 24 | }; 25 | static char *mon[] = { 26 | "Jan", "Feb", "Mar", "Apr", "May", "Jun", 27 | "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" 28 | }; 29 | 30 | 31 | time_t time(time_t *tp) 32 | { 33 | time_t res = start_time + nticks / TICK_FREQ_HZ; 34 | 35 | if(tp) *tp = res; 36 | return res; 37 | } 38 | 39 | char *asctime(struct tm *tm) 40 | { 41 | static char buf[64]; 42 | return asctime_r(tm, buf); 43 | } 44 | 45 | char *asctime_r(struct tm *tm, char *buf) 46 | { 47 | sprintf(buf, "%s %s %d %02d:%02d:%02d %d\n", wday[tm->tm_wday], 48 | mon[tm->tm_mon], tm->tm_mday, tm->tm_hour, tm->tm_min, 49 | tm->tm_sec, tm->tm_year + 1900); 50 | return buf; 51 | } 52 | 53 | time_t mktime(struct tm *tm) 54 | { 55 | int i, num_years = tm->tm_year - 70; 56 | int year = 1970; 57 | int days = day_of_year(tm->tm_year + 1900, tm->tm_mon, tm->tm_mday - 1); 58 | 59 | /* set correct yearday */ 60 | tm->tm_yday = days; 61 | 62 | for(i=0; itm_wday = (days + EPOCH_WDAY) % 7; 68 | 69 | return (time_t)days * DAYSEC + tm->tm_hour * HOURSEC + 70 | tm->tm_min * MINSEC + tm->tm_sec; 71 | } 72 | 73 | struct tm *gmtime(time_t *tp) 74 | { 75 | static struct tm tm; 76 | return gmtime_r(tp, &tm); 77 | } 78 | 79 | struct tm *gmtime_r(time_t *tp, struct tm *tm) 80 | { 81 | int year, days, leap, yrdays; 82 | time_t t; 83 | 84 | year = 1970; 85 | days = *tp / DAYSEC; 86 | t = *tp % DAYSEC; 87 | 88 | tm->tm_wday = (days + EPOCH_WDAY) % 7; 89 | 90 | while(days >= (yrdays = YEARDAYS(year))) { 91 | days -= yrdays; 92 | year++; 93 | } 94 | tm->tm_year = year - 1900; 95 | tm->tm_yday = days; 96 | 97 | leap = is_leap_year(year); 98 | tm->tm_mon = 0; 99 | while(days >= mdays[leap][tm->tm_mon]) { 100 | days -= mdays[leap][tm->tm_mon++]; 101 | } 102 | 103 | tm->tm_mday = days + 1; 104 | 105 | tm->tm_hour = t / HOURSEC; 106 | t %= HOURSEC; 107 | tm->tm_min = t / MINSEC; 108 | tm->tm_sec = t % MINSEC; 109 | return tm; 110 | } 111 | 112 | int day_of_year(int year, int mon, int day) 113 | { 114 | int i, yday, leap; 115 | 116 | leap = is_leap_year(year) ? 1 : 0; 117 | yday = day; 118 | 119 | for(i=0; i 2 | #include "mboot.h" 3 | #include "vid.h" 4 | #include "term.h" 5 | #include "asmops.h" 6 | #include "segm.h" 7 | #include "intr.h" 8 | #include "ata.h" 9 | #include "fs.h" 10 | #include "rtc.h" 11 | #include "timer.h" 12 | #include "mem.h" 13 | #include "vm.h" 14 | #include "proc.h" 15 | 16 | 17 | void kmain(struct mboot_info *mbinf) 18 | { 19 | clear_scr(); 20 | 21 | /* pointless verbal diarrhea */ 22 | if(mbinf->flags & MB_LDRNAME) { 23 | printf("loaded by: %s\n", mbinf->boot_loader_name); 24 | } 25 | if(mbinf->flags & MB_CMDLINE) { 26 | printf("kernel command line: %s\n", mbinf->cmdline); 27 | } 28 | 29 | puts("kernel starting up"); 30 | 31 | init_segm(); 32 | init_intr(); 33 | 34 | 35 | /* initialize the physical memory manager */ 36 | init_mem(mbinf); 37 | /* initialize paging and the virtual memory manager */ 38 | init_vm(); 39 | 40 | /* initialize ATA disks */ 41 | init_ata(); 42 | /* initialize the filesystem */ 43 | /*init_fs();*/ 44 | 45 | /* initialize the timer and RTC */ 46 | init_timer(); 47 | init_rtc(); 48 | 49 | /* create the first process and switch to it */ 50 | /*init_proc();*/ 51 | 52 | /* XXX unreachable */ 53 | 54 | for(;;) { 55 | halt_cpu(); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/mem.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "mem.h" 4 | #include "panic.h" 5 | #include "vm.h" 6 | #include "intr.h" 7 | 8 | #define FREE 0 9 | #define USED 1 10 | 11 | #define BM_IDX(pg) ((pg) / 32) 12 | #define BM_BIT(pg) ((pg) & 0x1f) 13 | 14 | #define IS_FREE(pg) ((bitmap[BM_IDX(pg)] & (1 << BM_BIT(pg))) == 0) 15 | 16 | static void mark_page(int pg, int free); 17 | static void add_memory(uint32_t start, size_t size); 18 | 19 | /* end of kernel image */ 20 | extern int _end; 21 | 22 | /* A bitmap is used to track which physical memory pages are used or available 23 | * for allocation by alloc_phys_page. 24 | * 25 | * last_alloc_idx keeps track of the last 32bit element in the bitmap array 26 | * where a free page was found. It's guaranteed that all the elements before 27 | * this have no free pages, but it doesn't imply that there will be another 28 | * free page there. So it's used as a starting point for the search. 29 | */ 30 | static uint32_t *bitmap; 31 | static int bmsize, last_alloc_idx; 32 | 33 | 34 | void init_mem(struct mboot_info *mb) 35 | { 36 | int i, num_pages, max_pg = 0; 37 | uint32_t used_end; 38 | 39 | num_pages = 0; 40 | last_alloc_idx = 0; 41 | 42 | /* the allocation bitmap starts right at the end of the ELF image */ 43 | bitmap = (uint32_t*)&_end; 44 | 45 | /* start by marking all posible pages (2**20) as used. We do not "reserve" 46 | * all this space. Pages beyond the end of the useful bitmap area 47 | * ((char*)bitmap + bmsize), which will be determined after we traverse the 48 | * memory map, are going to be marked as available for allocation. 49 | */ 50 | memset(bitmap, 0xff, 1024 * 1024 / 8); 51 | 52 | /* if the bootloader gave us an available memory map, traverse it and mark 53 | * all the corresponding pages as free. 54 | */ 55 | if(mb->flags & MB_MMAP) { 56 | struct mboot_mmap *mem, *mmap_end; 57 | 58 | mem = mb->mmap; 59 | mmap_end = (struct mboot_mmap*)((char*)mb->mmap + mb->mmap_len); 60 | 61 | printf("memory map:\n"); 62 | while(mem < mmap_end) { 63 | /* ignore memory ranges that start beyond the 4gb mark */ 64 | if(mem->base_high == 0 && mem->base_low != 0xffffffff) { 65 | char *type; 66 | unsigned int end, rest = 0xffffffff - mem->base_low; 67 | 68 | /* make sure the length does not extend beyond 4gb */ 69 | if(mem->length_high || mem->length_low > rest) { 70 | mem->length_low = rest; 71 | } 72 | end = mem->base_low + mem->length_low; 73 | 74 | if(mem->type == MB_MEM_VALID) { 75 | type = "free:"; 76 | add_memory(mem->base_low, mem->length_low); 77 | 78 | num_pages = ADDR_TO_PAGE(mem->base_low + mem->length_low); 79 | if(max_pg < num_pages) { 80 | max_pg = num_pages; 81 | } 82 | } else { 83 | type = "hole:"; 84 | } 85 | 86 | printf(" %s %x - %x (%u bytes)\n", type, mem->base_low, end, mem->length_low); 87 | } 88 | mem = (struct mboot_mmap*)((char*)mem + mem->skip + sizeof mem->skip); 89 | } 90 | } else if(mb->flags & MB_MEM) { 91 | /* if we don't have a detailed memory map, just use the lower and upper 92 | * memory block sizes to determine which pages should be available. 93 | */ 94 | add_memory(0, mb->mem_lower); 95 | add_memory(0x100000, mb->mem_upper * 1024); 96 | max_pg = mb->mem_upper / 4; 97 | 98 | printf("lower memory: %ukb, upper mem: %ukb\n", mb->mem_lower, mb->mem_upper); 99 | } else { 100 | /* I don't think this should ever happen with a multiboot-compliant boot loader */ 101 | panic("didn't get any memory info from the boot loader, I give up\n"); 102 | } 103 | 104 | bmsize = max_pg / 8; /* size of the useful bitmap in bytes */ 105 | 106 | /* mark all the used pages as ... well ... used */ 107 | used_end = ((uint32_t)bitmap + bmsize - 1); 108 | 109 | printf("marking pages up to %x ", used_end); 110 | used_end = ADDR_TO_PAGE(used_end); 111 | printf("(page: %d) inclusive as used\n", used_end); 112 | 113 | for(i=0; i<=used_end; i++) { 114 | mark_page(i, USED); 115 | } 116 | } 117 | 118 | /* alloc_phys_page finds the first available page of physical memory, 119 | * marks it as used in the bitmap, and returns its address. If there's 120 | * no unused physical page, 0 is returned. 121 | */ 122 | uint32_t alloc_phys_page(void) 123 | { 124 | int i, idx, max, intr_state; 125 | 126 | intr_state = get_intr_state(); 127 | disable_intr(); 128 | 129 | idx = last_alloc_idx; 130 | max = bmsize / 4; 131 | 132 | while(idx <= max) { 133 | /* if at least one bit is 0 then we have at least 134 | * one free page. find it and allocate it. 135 | */ 136 | if(bitmap[idx] != 0xffffffff) { 137 | for(i=0; i<32; i++) { 138 | int pg = idx * 32 + i; 139 | 140 | if(IS_FREE(pg)) { 141 | mark_page(pg, USED); 142 | 143 | last_alloc_idx = idx; 144 | 145 | /*printf("alloc_phys_page() -> %x (page: %d)\n", PAGE_TO_ADDR(pg), pg);*/ 146 | 147 | set_intr_state(intr_state); 148 | return PAGE_TO_ADDR(pg); 149 | } 150 | } 151 | panic("can't happen: alloc_phys_page (mem.c)\n"); 152 | } 153 | idx++; 154 | } 155 | 156 | set_intr_state(intr_state); 157 | return 0; 158 | } 159 | 160 | /* free_phys_page marks the physical page which corresponds to the specified 161 | * address as free in the allocation bitmap. 162 | * 163 | * CAUTION: no checks are done that this page should actually be freed or not. 164 | * If you call free_phys_page with the address of some part of memory that was 165 | * originally reserved due to it being in a memory hole or part of the kernel 166 | * image or whatever, it will be subsequently allocatable by alloc_phys_page. 167 | */ 168 | void free_phys_page(uint32_t addr) 169 | { 170 | int pg = ADDR_TO_PAGE(addr); 171 | int bmidx = BM_IDX(pg); 172 | 173 | int intr_state = get_intr_state(); 174 | disable_intr(); 175 | 176 | if(IS_FREE(pg)) { 177 | panic("free_phys_page(%d): I thought that was already free!\n", pg); 178 | } 179 | 180 | mark_page(pg, FREE); 181 | if(bmidx < last_alloc_idx) { 182 | last_alloc_idx = bmidx; 183 | } 184 | 185 | set_intr_state(intr_state); 186 | } 187 | 188 | /* this is only ever used by the VM init code to find out what the extends of 189 | * the kernel image are, in order to map them 1-1 before enabling paging. 190 | */ 191 | void get_kernel_mem_range(uint32_t *start, uint32_t *end) 192 | { 193 | if(start) { 194 | *start = 0x100000; 195 | } 196 | if(end) { 197 | uint32_t e = (uint32_t)bitmap + bmsize; 198 | 199 | if(e & PGOFFS_MASK) { 200 | *end = (e + 4096) & ~PGOFFS_MASK; 201 | } else { 202 | *end = e; 203 | } 204 | } 205 | } 206 | 207 | /* adds a range of physical memory to the available pool. used during init_mem 208 | * when traversing the memory map. 209 | */ 210 | static void add_memory(uint32_t start, size_t sz) 211 | { 212 | int i, szpg, pg; 213 | 214 | szpg = ADDR_TO_PAGE(sz); 215 | pg = ADDR_TO_PAGE(start); 216 | 217 | for(i=0; i 2 | #include "mutex.h" 3 | #include "sched.h" 4 | #include "intr.h" 5 | 6 | void mutex_lock(mutex_t *m) 7 | { 8 | int istate = get_intr_state(); 9 | disable_intr(); 10 | 11 | /* sleep while the mutex is held */ 12 | while(*m > 0) { 13 | wait(m); 14 | } 15 | /* then grab it... */ 16 | (*m)++; 17 | 18 | set_intr_state(istate); 19 | } 20 | 21 | void mutex_unlock(mutex_t *m) 22 | { 23 | int istate = get_intr_state(); 24 | disable_intr(); 25 | 26 | assert(*m); 27 | /* release the mutex and wakeup everyone waiting on it */ 28 | (*m)--; 29 | wakeup(m); 30 | 31 | set_intr_state(istate); 32 | } 33 | 34 | int mutex_trylock(mutex_t *m) 35 | { 36 | int res = -1, istate = get_intr_state(); 37 | disable_intr(); 38 | 39 | if(*m == 0) { 40 | (*m)++; 41 | res = 0; 42 | } 43 | set_intr_state(istate); 44 | return res; 45 | } 46 | -------------------------------------------------------------------------------- /src/mutex.h: -------------------------------------------------------------------------------- 1 | #ifndef MUTEX_H_ 2 | #define MUTEX_H_ 3 | 4 | typedef unsigned int mutex_t; 5 | 6 | void mutex_lock(mutex_t *m); 7 | void mutex_unlock(mutex_t *m); 8 | 9 | int mutex_trylock(mutex_t *m); 10 | 11 | #endif /* MUTEX_H_ */ 12 | -------------------------------------------------------------------------------- /src/panic.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "asmops.h" 5 | #include "proc.h" 6 | 7 | struct all_registers { 8 | uint32_t eax, ebx, ecx, edx; 9 | uint32_t esp, ebp, esi, edi; 10 | uint32_t eflags; 11 | uint32_t cs, ss, ds, es, fs, gs; 12 | uint32_t cr0, cr1, cr2, cr3; 13 | }; 14 | 15 | /* defined in regs.S */ 16 | void get_regs(struct all_registers *regs); 17 | 18 | void panic(const char *fmt, ...) 19 | { 20 | va_list ap; 21 | struct all_registers regs; 22 | uint32_t eip; 23 | 24 | disable_intr(); 25 | 26 | memset(®s, 0, sizeof regs); 27 | get_regs(®s); 28 | 29 | eip = get_caller_instr_ptr(); 30 | 31 | printf("~~~~~ kernel panic ~~~~~\n"); 32 | va_start(ap, fmt); 33 | vprintf(fmt, ap); 34 | va_end(ap); 35 | 36 | printf("\nRegisters:\n"); 37 | printf("eax: %x, ebx: %x, ecx: %x, edx: %x\n", regs.eax, regs.ebx, regs.ecx, regs.edx); 38 | printf("esp: %x, ebp: %x, esi: %x, edi: %x\n", regs.esp, regs.ebp, regs.esi, regs.edi); 39 | printf("eip: %x, eflags: %x\n", eip, regs.eflags); 40 | printf("cr0: %x, cr1: %x, cr2: %x, cr3: %x\n", regs.cr0, regs.cr1, regs.cr2, regs.cr3); 41 | printf("cs: %x (%d|%d)\n", regs.cs, regs.cs >> 3, regs.cs & 3); 42 | printf("ss: %x (%d|%d)\n", regs.ss, regs.ss >> 3, regs.ss & 3); 43 | printf("ds: %x (%d|%d)\n", regs.ds, regs.ds >> 3, regs.ds & 3); 44 | printf("es: %x (%d|%d)\n", regs.es, regs.es >> 3, regs.es & 3); 45 | printf("fs: %x (%d|%d)\n", regs.fs, regs.fs >> 3, regs.fs & 3); 46 | printf("gs: %x (%d|%d)\n", regs.gs, regs.gs >> 3, regs.gs & 3); 47 | 48 | halt_cpu(); 49 | } 50 | -------------------------------------------------------------------------------- /src/panic.h: -------------------------------------------------------------------------------- 1 | #ifndef PANIC_H_ 2 | #define PANIC_H_ 3 | 4 | void panic(const char *fmt, ...); 5 | 6 | #endif /* PANIC_H_ */ 7 | -------------------------------------------------------------------------------- /src/part.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "part.h" 5 | #include "ata.h" 6 | 7 | #define PTYPE_EXT 0x5 8 | #define PTYPE_EXT_LBA 0xf 9 | 10 | #define PATTR_ACT_BIT (1 << 9) 11 | #define PATTR_PRIM_BIT (1 << 10) 12 | 13 | #define PTYPE(attr) ((attr) & 0xff) 14 | #define IS_ACT(attr) ((attr) & PATTR_ACT_BIT) 15 | #define IS_PRIM(attr) ((attr) & PATTR_PRIM_BIT) 16 | 17 | #define BOOTSIG_OFFS 510 18 | #define PTABLE_OFFS 0x1be 19 | 20 | #define BOOTSIG 0xaa55 21 | 22 | #define IS_MBR (sidx == 0) 23 | #define IS_FIRST_EBR (!IS_MBR && (first_ebr_offs == 0)) 24 | 25 | struct part_record { 26 | uint8_t stat; 27 | uint8_t first_head, first_cyl, first_sect; 28 | uint8_t type; 29 | uint8_t last_head, last_cyl, last_sect; 30 | uint32_t first_lba; 31 | uint32_t nsect_lba; 32 | } __attribute__((packed)); 33 | 34 | 35 | static uint16_t bootsig(const char *sect); 36 | 37 | 38 | struct partition *get_part_list(int devno) 39 | { 40 | char *sect; 41 | struct partition *phead = 0, *ptail = 0; 42 | uint32_t sidx = 0; 43 | uint32_t first_ebr_offs = 0; 44 | int i, num_bootrec = 0; 45 | 46 | sect = malloc(512); 47 | assert(sect); 48 | 49 | do { 50 | int num_rec; 51 | struct part_record *prec; 52 | 53 | if(IS_FIRST_EBR) { 54 | first_ebr_offs = sidx; 55 | } 56 | 57 | if(ata_read_pio(devno, sidx, sect) == -1) { 58 | goto err; 59 | } 60 | if(bootsig(sect) != BOOTSIG) { 61 | printf("invalid/corrupted partition table, sector %lu has no magic\n", (unsigned long)sidx); 62 | goto err; 63 | } 64 | prec = (struct part_record*)(sect + PTABLE_OFFS); 65 | 66 | /* MBR has 4 records, EBRs have 2 */ 67 | num_rec = IS_MBR ? 4 : 2; 68 | 69 | for(i=0; i 0) { 75 | sidx = 0; 76 | break; 77 | } 78 | continue; 79 | } 80 | 81 | /* ignore extended partitions and setup sector index to read 82 | * the next logical partition afterwards. 83 | */ 84 | if(prec[i].type == PTYPE_EXT || prec[i].type == PTYPE_EXT_LBA) { 85 | /* all EBR start fields are relative to the first EBR offset */ 86 | sidx = first_ebr_offs + prec[i].first_lba; 87 | continue; 88 | } 89 | 90 | pnode = malloc(sizeof *pnode); 91 | assert(pnode); 92 | 93 | pnode->attr = prec[i].type; 94 | 95 | if(prec[i].stat & 0x80) { 96 | pnode->attr |= PATTR_ACT_BIT; 97 | } 98 | if(IS_MBR) { 99 | pnode->attr |= PATTR_PRIM_BIT; 100 | } 101 | pnode->start_sect = prec[i].first_lba + first_ebr_offs; 102 | pnode->size_sect = prec[i].nsect_lba; 103 | pnode->next = 0; 104 | 105 | /* append to the list */ 106 | if(!phead) { 107 | phead = ptail = pnode; 108 | } else { 109 | ptail->next = pnode; 110 | ptail = pnode; 111 | } 112 | } 113 | 114 | num_bootrec++; 115 | } while(sidx > 0); 116 | 117 | free(sect); 118 | return phead; 119 | 120 | err: 121 | free(sect); 122 | while(phead) { 123 | void *tmp = phead; 124 | phead = phead->next; 125 | free(tmp); 126 | } 127 | return 0; 128 | } 129 | 130 | void free_part_list(struct partition *plist) 131 | { 132 | while(plist) { 133 | struct partition *tmp = plist; 134 | plist = plist->next; 135 | free(tmp); 136 | } 137 | } 138 | 139 | int get_part_type(struct partition *p) 140 | { 141 | return PTYPE(p->attr); 142 | } 143 | 144 | 145 | static uint16_t bootsig(const char *sect) 146 | { 147 | return *(uint16_t*)(sect + BOOTSIG_OFFS); 148 | } 149 | 150 | -------------------------------------------------------------------------------- /src/part.h: -------------------------------------------------------------------------------- 1 | #ifndef PART_H_ 2 | #define PART_H_ 3 | 4 | #include 5 | 6 | struct partition { 7 | uint32_t start_sect; 8 | size_t size_sect; 9 | 10 | unsigned int attr; 11 | 12 | struct partition *next; 13 | }; 14 | 15 | struct partition *get_part_list(int devno); 16 | void free_part_list(struct partition *plist); 17 | 18 | int get_part_type(struct partition *p); 19 | 20 | #endif /* PART_H_ */ 21 | -------------------------------------------------------------------------------- /src/proc-asm.S: -------------------------------------------------------------------------------- 1 | .text 2 | /* switch_stack(uint32_t new_stack, uint32_t *old_stack_ptr) 3 | * switches to the new stack and returns the old stack pointer, which is 4 | * also copied to the address passed as the second argument. 5 | */ 6 | .globl switch_stack 7 | switch_stack: 8 | movl %esp, %eax /* old stack in eax */ 9 | movl 8(%esp), %edx 10 | cmpl $0, %edx /* if old_stack_ptr is null, skip ahead */ 11 | jz oldp_is_null 12 | movl %eax, (%edx) /* otherwise *old_stack_ptr = eax */ 13 | oldp_is_null: 14 | movl 4(%esp), %esp /* set the new stack */ 15 | ret 16 | 17 | /* get_instr_stack_ptr(uint32_t *eip, uint32_t *esp) 18 | * returns the current instruction and stack pointers at the same 19 | * point in execution, so that a newly-forked process with these 20 | * values will just return from this function and continue on. 21 | */ 22 | .globl get_instr_stack_ptr 23 | get_instr_stack_ptr: 24 | call get_instr_ptr 25 | movl %eax, 4(%esp) 26 | movl %esp, 8(%esp) 27 | ret 28 | 29 | /* get_instr_ptr(void) 30 | * returns the address of the next instruction after the call to this function 31 | */ 32 | .globl get_instr_ptr 33 | get_instr_ptr: 34 | movl (%esp), %eax 35 | ret 36 | 37 | /* get_caller_instr_ptr(void) 38 | * returns the address of the next instruction after the call to the function that 39 | * called this function. 40 | * NOTE: will only work properly when called from a function that uses ebp to point 41 | * to its stack frame, which means all of the C functions but pretty much none of 42 | * our assembly functions. 43 | */ 44 | .globl get_caller_instr_ptr 45 | get_caller_instr_ptr: 46 | movl 4(%ebp), %eax 47 | ret 48 | 49 | /* this is where we end up when we first context_switch to a newly forked 50 | * process. The interrupt frame is already there, so we just call intr_ret 51 | * to return to user space 52 | */ 53 | .globl just_forked 54 | just_forked: 55 | call intr_ret 56 | -------------------------------------------------------------------------------- /src/proc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "config.h" 6 | #include "proc.h" 7 | #include "tss.h" 8 | #include "vm.h" 9 | #include "segm.h" 10 | #include "intr.h" 11 | #include "panic.h" 12 | #include "syscall.h" 13 | #include "sched.h" 14 | #include "tss.h" 15 | #include "kdef.h" 16 | 17 | #define FLAGS_INTR_BIT (1 << 9) 18 | 19 | static void start_first_proc(void); 20 | 21 | /* defined in proc-asm.S */ 22 | uint32_t switch_stack(uint32_t new_stack, uint32_t *old_stack); 23 | void just_forked(void); 24 | 25 | /* defined in test_proc.S */ 26 | void test_proc(void); 27 | void test_proc_end(void); 28 | 29 | static struct process proc[MAX_PROC]; 30 | 31 | /* cur_pid: pid of the currently executing process. 32 | * when we're in the idle process cur_pid will be 0. 33 | * last_pid: pid of the last real process that was running, this should 34 | * never become 0. Essentially this defines the active kernel stack. 35 | */ 36 | static int cur_pid, last_pid; 37 | 38 | static struct task_state *tss; 39 | 40 | 41 | void init_proc(void) 42 | { 43 | int tss_page; 44 | 45 | /* allocate a page for the task state segment, to make sure 46 | * it doesn't cross page boundaries 47 | */ 48 | if((tss_page = pgalloc(1, MEM_KERNEL)) == -1) { 49 | panic("failed to allocate memory for the task state segment\n"); 50 | } 51 | tss = (struct task_state*)PAGE_TO_ADDR(tss_page); 52 | 53 | /* the kernel stack segment never changes so we might as well set it now 54 | * the only other thing that we use in the tss is the kernel stack pointer 55 | * which is different for each process, and thus managed by context_switch 56 | */ 57 | memset(tss, 0, sizeof *tss); 58 | tss->ss0 = selector(SEGM_KDATA, 0); 59 | 60 | set_tss((uint32_t)tss); 61 | 62 | /* initialize system call handler (see syscall.c) */ 63 | init_syscall(); 64 | 65 | start_first_proc(); /* XXX never returns */ 66 | } 67 | 68 | static void start_first_proc(void) 69 | { 70 | struct process *p; 71 | int proc_size_pg, img_start_pg, stack_pg; 72 | uint32_t img_start_addr; 73 | struct intr_frame ifrm; 74 | 75 | /* prepare the first process */ 76 | p = proc + 1; 77 | p->id = 1; 78 | p->parent = 0; /* no parent for init */ 79 | 80 | p->umask = 022; 81 | 82 | p->ticks_left = TIMESLICE_TICKS; 83 | p->next = p->prev = 0; 84 | 85 | /* the first process may keep this existing page table */ 86 | p->ctx.pgtbl_paddr = get_pgdir_addr(); 87 | 88 | /* allocate a chunk of memory for the process image 89 | * and copy the code of test_proc there. 90 | */ 91 | proc_size_pg = (test_proc_end - test_proc) / PGSIZE + 1; 92 | if((img_start_pg = pgalloc(proc_size_pg, MEM_USER)) == -1) { 93 | panic("failed to allocate space for the init process image\n"); 94 | } 95 | img_start_addr = PAGE_TO_ADDR(img_start_pg); 96 | memcpy((void*)img_start_addr, test_proc, proc_size_pg * PGSIZE); 97 | printf("copied init process at: %x\n", img_start_addr); 98 | 99 | /* allocate the first page of the user stack */ 100 | stack_pg = ADDR_TO_PAGE(KMEM_START) - 1; 101 | if(pgalloc_vrange(stack_pg, 1) == -1) { 102 | panic("failed to allocate user stack page\n"); 103 | } 104 | p->user_stack_pg = stack_pg; 105 | 106 | /* allocate a kernel stack for this process */ 107 | if((p->kern_stack_pg = pgalloc(KERN_STACK_SIZE / PGSIZE, MEM_KERNEL)) == -1) { 108 | panic("failed to allocate kernel stack for the init process\n"); 109 | } 110 | /* when switching from user space to kernel space, the ss0:esp0 from TSS 111 | * will be used to switch to the per-process kernel stack, so we need to 112 | * set it correctly before switching to user space. 113 | * tss->ss0 is already set in init_proc above. 114 | */ 115 | tss->esp0 = PAGE_TO_ADDR(p->kern_stack_pg) + KERN_STACK_SIZE; 116 | 117 | 118 | /* now we need to fill in the fake interrupt stack frame */ 119 | memset(&ifrm, 0, sizeof ifrm); 120 | /* after the priviledge switch, this ss:esp will be used in userspace */ 121 | ifrm.esp = PAGE_TO_ADDR(stack_pg) + PGSIZE; 122 | ifrm.ss = selector(SEGM_UDATA, 3); 123 | /* instruction pointer at the beginning of the process image */ 124 | ifrm.eip = img_start_addr; 125 | ifrm.cs = selector(SEGM_UCODE, 3); 126 | /* make sure the user will run with interrupts enabled */ 127 | ifrm.eflags = FLAGS_INTR_BIT; 128 | /* user data selectors should all be the same */ 129 | ifrm.ds = ifrm.es = ifrm.fs = ifrm.gs = ifrm.ss; 130 | 131 | /* add it to the scheduler queues */ 132 | add_proc(p->id); 133 | 134 | /* make it current */ 135 | set_current_pid(p->id); 136 | 137 | /* build the current vm map */ 138 | cons_vmmap(&p->vmmap); 139 | 140 | /* execute a fake return from interrupt with the fake stack frame */ 141 | intr_ret(ifrm); 142 | } 143 | 144 | int sys_fork(void) 145 | { 146 | int i, pid; 147 | struct process *p, *parent; 148 | 149 | disable_intr(); 150 | 151 | /* find a free process slot */ 152 | /* TODO don't search up to MAX_PROC if uid != 0 */ 153 | pid = -1; 154 | for(i=1; ifiles, parent->files, sizeof p->files); 172 | 173 | p->umask = parent->umask; 174 | 175 | /* allocate a kernel stack for the new process */ 176 | if((p->kern_stack_pg = pgalloc(KERN_STACK_SIZE / PGSIZE, MEM_KERNEL)) == -1) { 177 | return -EAGAIN; 178 | } 179 | p->ctx.stack_ptr = PAGE_TO_ADDR(p->kern_stack_pg) + KERN_STACK_SIZE; 180 | /* we need to copy the current interrupt frame to the new kernel stack so 181 | * that the new process will return to the same point as the parent, just 182 | * after the fork syscall. 183 | */ 184 | p->ctx.stack_ptr -= sizeof(struct intr_frame); 185 | memcpy((void*)p->ctx.stack_ptr, get_intr_frame(), sizeof(struct intr_frame)); 186 | /* child's return from fork returns 0 */ 187 | ((struct intr_frame*)p->ctx.stack_ptr)->regs.eax = 0; 188 | 189 | /* we also need the address of just_forked in the stack, so that switch_stacks 190 | * called from context_switch, will return to just_forked when we first switch 191 | * to a newly forked process. just_forked then just calls intr_ret to return to 192 | * userspace with the already constructed interrupt frame (see above). 193 | */ 194 | p->ctx.stack_ptr -= 4; 195 | *(uint32_t*)p->ctx.stack_ptr = (uint32_t)just_forked; 196 | 197 | /* initialize the rest of the process structure */ 198 | p->id = pid; 199 | p->parent = parent->id; 200 | p->child_list = 0; 201 | p->next = p->prev = 0; 202 | 203 | /* add to the child list */ 204 | p->sib_next = parent->child_list; 205 | parent->child_list = p; 206 | 207 | /* will be copied on write */ 208 | p->user_stack_pg = parent->user_stack_pg; 209 | 210 | /* clone the parent's virtual memory */ 211 | clone_vm(p, parent, CLONE_COW); 212 | 213 | /* done, now let's add it to the scheduler runqueue */ 214 | add_proc(p->id); 215 | 216 | return pid; 217 | } 218 | 219 | int sys_exit(int status) 220 | { 221 | struct process *p, *child; 222 | 223 | p = get_current_proc(); 224 | 225 | printf("process %d exit(%d)\n", p->id, status); 226 | 227 | /* TODO deliver SIGCHLD to the parent */ 228 | 229 | /* find any child processes and make init adopt them */ 230 | child = p->child_list; 231 | while(child) { 232 | child->parent = 1; 233 | child = child->sib_next; 234 | } 235 | 236 | cleanup_vm(p); 237 | 238 | /* remove it from the runqueue */ 239 | remove_proc(p->id); 240 | 241 | /* make it a zombie until its parent reaps it */ 242 | p->state = STATE_ZOMBIE; 243 | p->exit_status = (status & _WSTATUS_MASK) | (_WREASON_EXITED << _WREASON_SHIFT); 244 | 245 | /* wakeup any processes waiting for it 246 | * we're waking up the parent's address, because waitpid waits 247 | * on it's own process struct, not knowing which child will die 248 | * first. 249 | */ 250 | wakeup(get_process(p->parent)); 251 | return 0; 252 | } 253 | 254 | int sys_waitpid(int pid, int *status, int opt) 255 | { 256 | struct process *p, *child; 257 | 258 | p = get_current_proc(); 259 | 260 | restart: 261 | if(pid <= 0) { 262 | /* search for zombie children */ 263 | child = p->child_list; 264 | while(child) { 265 | if(child->state == STATE_ZOMBIE) { 266 | break; 267 | } 268 | child = child->sib_next; 269 | } 270 | } else { 271 | if(!(child = get_process(pid)) || child->parent != p->id) { 272 | return -ECHILD; 273 | } 274 | if(child->state != STATE_ZOMBIE) { 275 | child = 0; 276 | } 277 | } 278 | 279 | /* found ? */ 280 | if(child) { 281 | int res; 282 | struct process *prev, dummy; 283 | 284 | if(status) { 285 | *status = child->exit_status; 286 | } 287 | res = child->id; 288 | 289 | /* remove it from our children list */ 290 | dummy.sib_next = p->child_list; 291 | prev = &dummy; 292 | while(prev->next) { 293 | if(prev->next == child) { 294 | prev->next = child->next; 295 | break; 296 | } 297 | } 298 | p->child_list = dummy.next; 299 | 300 | /* invalidate the id */ 301 | child->id = 0; 302 | return res; 303 | } 304 | 305 | /* not found, wait or sod off */ 306 | if(!(opt & WNOHANG)) { 307 | /* wait on our own process struct because 308 | * we have no way of knowing which child will 309 | * die first. 310 | * exit will wakeup the parent structure... 311 | */ 312 | wait(p); 313 | /* done waiting, restart waitpid */ 314 | goto restart; 315 | } 316 | 317 | return 0; /* he's not dead jim */ 318 | } 319 | 320 | void context_switch(int pid) 321 | { 322 | static struct process *prev, *new; 323 | 324 | assert(get_intr_state() == 0); 325 | assert(pid > 0); 326 | assert(last_pid > 0); 327 | 328 | prev = proc + last_pid; 329 | new = proc + pid; 330 | 331 | if(last_pid != pid) { 332 | set_current_pid(new->id); 333 | 334 | /* switch to the new process' address space */ 335 | set_pgdir_addr(new->ctx.pgtbl_paddr); 336 | 337 | /* make sure we'll return to the correct kernel stack next time 338 | * we enter from userspace 339 | */ 340 | tss->esp0 = PAGE_TO_ADDR(new->kern_stack_pg) + KERN_STACK_SIZE; 341 | 342 | /* push all registers onto the stack before switching stacks */ 343 | push_regs(); 344 | 345 | /* XXX: when switching to newly forked processes this switch_stack call 346 | * WILL NOT RETURN HERE. It will return to just_forked instead. So the 347 | * rest of this function will not run. 348 | */ 349 | switch_stack(new->ctx.stack_ptr, &prev->ctx.stack_ptr); 350 | 351 | /* restore registers from the new stack */ 352 | pop_regs(); 353 | } else { 354 | set_current_pid(new->id); 355 | } 356 | } 357 | 358 | 359 | void set_current_pid(int pid) 360 | { 361 | cur_pid = pid; 362 | if(pid > 0) { 363 | last_pid = pid; 364 | } 365 | } 366 | 367 | int get_current_pid(void) 368 | { 369 | return cur_pid; 370 | } 371 | 372 | struct process *get_current_proc(void) 373 | { 374 | return cur_pid > 0 ? &proc[cur_pid] : 0; 375 | } 376 | 377 | struct process *get_process(int pid) 378 | { 379 | struct process *p = proc + pid; 380 | if(p->id != pid) { 381 | printf("get_process called with invalid pid: %d\n", pid); 382 | return 0; 383 | } 384 | return p; 385 | } 386 | 387 | int sys_getpid(void) 388 | { 389 | return cur_pid; 390 | } 391 | 392 | int sys_getppid(void) 393 | { 394 | struct process *p = get_current_proc(); 395 | 396 | if(!p) { 397 | return 0; 398 | } 399 | return p->parent; 400 | } 401 | -------------------------------------------------------------------------------- /src/proc.h: -------------------------------------------------------------------------------- 1 | #ifndef PROC_H_ 2 | #define PROC_H_ 3 | 4 | #include 5 | #include "asmops.h" 6 | #include "rbtree.h" 7 | #include "file.h" 8 | 9 | #define MAX_PROC 128 10 | #define MAX_FD 64 11 | 12 | struct context { 13 | /*struct registers regs;*/ /* saved general purpose registers */ 14 | /*uint32_t instr_ptr;*/ /* saved eip */ 15 | uint32_t stack_ptr; /* saved esp */ 16 | /*uint32_t flags;*/ /* saved eflags */ 17 | uint32_t pgtbl_paddr; /* physical address of the page table */ 18 | /* TODO add FPU state */ 19 | }; 20 | 21 | enum proc_state { 22 | STATE_RUNNABLE, 23 | STATE_BLOCKED, 24 | STATE_ZOMBIE 25 | }; 26 | 27 | 28 | struct process { 29 | int id, parent; 30 | enum proc_state state; 31 | 32 | int exit_status; 33 | 34 | /* when blocked it's waiting for a wakeup on this address */ 35 | void *wait_addr; 36 | 37 | int ticks_left; 38 | 39 | /* process vm map */ 40 | struct rbtree vmmap; 41 | 42 | /* extends of the process heap, increased by sbrk */ 43 | 44 | /* first page of the user stack, extends up to KMEM_START */ 45 | int user_stack_pg; 46 | /* first page of the kernel stack, (KERN_STACK_SIZE) */ 47 | int kern_stack_pg; 48 | 49 | struct context ctx; 50 | 51 | /* open files */ 52 | struct file files[MAX_FD]; 53 | 54 | unsigned int umask; 55 | 56 | struct process *child_list; 57 | 58 | struct process *next, *prev; /* for the scheduler queues */ 59 | struct process *sib_next; /* for the sibling list */ 60 | }; 61 | 62 | void init_proc(void); 63 | 64 | int sys_fork(void); 65 | int sys_exit(int status); 66 | int sys_waitpid(int pid, int *status, int opt); 67 | 68 | void context_switch(int pid); 69 | 70 | void set_current_pid(int pid); 71 | int get_current_pid(void); 72 | struct process *get_current_proc(void); 73 | struct process *get_process(int pid); 74 | 75 | int sys_getpid(void); 76 | int sys_getppid(void); 77 | 78 | /* defined in proc-asm.S */ 79 | uint32_t get_instr_ptr(void); 80 | uint32_t get_caller_instr_ptr(void); 81 | void get_instr_stack_ptr(uint32_t *iptr, uint32_t *sptr); 82 | 83 | #endif /* PROC_H_ */ 84 | -------------------------------------------------------------------------------- /src/rbtree.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "rbtree.h" 5 | #include "panic.h" 6 | 7 | #define INT2PTR(x) ((void*)(x)) 8 | #define PTR2INT(x) ((int)(x)) 9 | 10 | static int cmpaddr(void *ap, void *bp); 11 | static int cmpint(void *ap, void *bp); 12 | 13 | static int count_nodes(struct rbnode *node); 14 | static void del_tree(struct rbnode *node, void (*delfunc)(struct rbnode*, void*), void *cls); 15 | static struct rbnode *insert(struct rbtree *rb, struct rbnode *tree, void *key, void *data); 16 | static struct rbnode *delete(struct rbtree *rb, struct rbnode *tree, void *key); 17 | static void traverse(struct rbnode *node, void (*func)(struct rbnode*, void*), void *cls); 18 | 19 | struct rbtree *rb_create(rb_cmp_func_t cmp_func) 20 | { 21 | struct rbtree *rb; 22 | 23 | if(!(rb = malloc(sizeof *rb))) { 24 | return 0; 25 | } 26 | if(rb_init(rb, cmp_func) == -1) { 27 | free(rb); 28 | return 0; 29 | } 30 | return rb; 31 | } 32 | 33 | void rb_free(struct rbtree *rb) 34 | { 35 | rb_destroy(rb); 36 | free(rb); 37 | } 38 | 39 | 40 | int rb_init(struct rbtree *rb, rb_cmp_func_t cmp_func) 41 | { 42 | memset(rb, 0, sizeof *rb); 43 | 44 | if(cmp_func == RB_KEY_INT) { 45 | rb->cmp = cmpint; 46 | } else if(cmp_func == RB_KEY_STRING) { 47 | rb->cmp = (rb_cmp_func_t)strcmp; 48 | } else { 49 | rb->cmp = cmpaddr; 50 | } 51 | 52 | rb->alloc = malloc; 53 | rb->free = free; 54 | return 0; 55 | } 56 | 57 | void rb_destroy(struct rbtree *rb) 58 | { 59 | del_tree(rb->root, rb->del, rb->del_cls); 60 | } 61 | 62 | void rb_clear(struct rbtree *rb) 63 | { 64 | del_tree(rb->root, rb->del, rb->del_cls); 65 | rb->root = 0; 66 | } 67 | 68 | int rb_copy(struct rbtree *dest, struct rbtree *src) 69 | { 70 | struct rbnode *node; 71 | 72 | rb_clear(dest); 73 | 74 | rb_begin(src); 75 | while((node = rb_next(src))) { 76 | if(rb_insert(dest, node->key, node->data) == -1) { 77 | return -1; 78 | } 79 | } 80 | return 0; 81 | } 82 | 83 | void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free) 84 | { 85 | rb->alloc = alloc; 86 | rb->free = free; 87 | } 88 | 89 | 90 | void rb_set_compare_func(struct rbtree *rb, rb_cmp_func_t func) 91 | { 92 | rb->cmp = func; 93 | } 94 | 95 | void rb_set_delete_func(struct rbtree *rb, rb_del_func_t func, void *cls) 96 | { 97 | rb->del = func; 98 | rb->del_cls = cls; 99 | } 100 | 101 | int rb_size(struct rbtree *rb) 102 | { 103 | return count_nodes(rb->root); 104 | } 105 | 106 | int rb_insert(struct rbtree *rb, void *key, void *data) 107 | { 108 | rb->root = insert(rb, rb->root, key, data); 109 | rb->root->red = 0; 110 | return 0; 111 | } 112 | 113 | int rb_inserti(struct rbtree *rb, int key, void *data) 114 | { 115 | rb->root = insert(rb, rb->root, INT2PTR(key), data); 116 | rb->root->red = 0; 117 | return 0; 118 | } 119 | 120 | 121 | int rb_delete(struct rbtree *rb, void *key) 122 | { 123 | rb->root = delete(rb, rb->root, key); 124 | rb->root->red = 0; 125 | return 0; 126 | } 127 | 128 | int rb_deletei(struct rbtree *rb, int key) 129 | { 130 | rb->root = delete(rb, rb->root, INT2PTR(key)); 131 | rb->root->red = 0; 132 | return 0; 133 | } 134 | 135 | 136 | void *rb_find(struct rbtree *rb, void *key) 137 | { 138 | struct rbnode *node = rb->root; 139 | 140 | while(node) { 141 | int cmp = rb->cmp(key, node->key); 142 | if(cmp == 0) { 143 | return node; 144 | } 145 | node = cmp < 0 ? node->left : node->right; 146 | } 147 | return 0; 148 | } 149 | 150 | void *rb_findi(struct rbtree *rb, int key) 151 | { 152 | return rb_find(rb, INT2PTR(key)); 153 | } 154 | 155 | 156 | void rb_foreach(struct rbtree *rb, void (*func)(struct rbnode*, void*), void *cls) 157 | { 158 | traverse(rb->root, func, cls); 159 | } 160 | 161 | 162 | struct rbnode *rb_root(struct rbtree *rb) 163 | { 164 | return rb->root; 165 | } 166 | 167 | void rb_begin(struct rbtree *rb) 168 | { 169 | rb->rstack = 0; 170 | rb->iter = rb->root; 171 | } 172 | 173 | #define push(sp, x) ((x)->next = (sp), (sp) = (x)) 174 | #define pop(sp) ((sp) = (sp)->next) 175 | #define top(sp) (sp) 176 | 177 | struct rbnode *rb_next(struct rbtree *rb) 178 | { 179 | struct rbnode *res = 0; 180 | 181 | while(rb->rstack || rb->iter) { 182 | if(rb->iter) { 183 | push(rb->rstack, rb->iter); 184 | rb->iter = rb->iter->left; 185 | } else { 186 | rb->iter = top(rb->rstack); 187 | pop(rb->rstack); 188 | res = rb->iter; 189 | rb->iter = rb->iter->right; 190 | break; 191 | } 192 | } 193 | return res; 194 | } 195 | 196 | void *rb_node_key(struct rbnode *node) 197 | { 198 | return node ? node->key : 0; 199 | } 200 | 201 | int rb_node_keyi(struct rbnode *node) 202 | { 203 | return node ? PTR2INT(node->key) : 0; 204 | } 205 | 206 | void *rb_node_data(struct rbnode *node) 207 | { 208 | return node ? node->data : 0; 209 | } 210 | 211 | static int cmpaddr(void *ap, void *bp) 212 | { 213 | return ap < bp ? -1 : (ap > bp ? 1 : 0); 214 | } 215 | 216 | static int cmpint(void *ap, void *bp) 217 | { 218 | return PTR2INT(ap) - PTR2INT(bp); 219 | } 220 | 221 | 222 | /* ---- left-leaning 2-3 red-black implementation ---- */ 223 | 224 | /* helper prototypes */ 225 | static int is_red(struct rbnode *tree); 226 | static void color_flip(struct rbnode *tree); 227 | static struct rbnode *rot_left(struct rbnode *a); 228 | static struct rbnode *rot_right(struct rbnode *a); 229 | static struct rbnode *find_min(struct rbnode *tree); 230 | static struct rbnode *del_min(struct rbtree *rb, struct rbnode *tree); 231 | /*static struct rbnode *move_red_right(struct rbnode *tree);*/ 232 | static struct rbnode *move_red_left(struct rbnode *tree); 233 | static struct rbnode *fix_up(struct rbnode *tree); 234 | 235 | static int count_nodes(struct rbnode *node) 236 | { 237 | if(!node) 238 | return 0; 239 | 240 | return 1 + count_nodes(node->left) + count_nodes(node->right); 241 | } 242 | 243 | static void del_tree(struct rbnode *node, rb_del_func_t delfunc, void *cls) 244 | { 245 | if(!node) 246 | return; 247 | 248 | del_tree(node->left, delfunc, cls); 249 | del_tree(node->right, delfunc, cls); 250 | 251 | if(delfunc) { 252 | delfunc(node, cls); 253 | } 254 | free(node); 255 | } 256 | 257 | static struct rbnode *insert(struct rbtree *rb, struct rbnode *tree, void *key, void *data) 258 | { 259 | int cmp; 260 | 261 | if(!tree) { 262 | struct rbnode *node = rb->alloc(sizeof *node); 263 | if(!node) { 264 | panic("failed to allocate tree node\n"); 265 | } 266 | node->red = 1; 267 | node->key = key; 268 | node->data = data; 269 | node->left = node->right = 0; 270 | return node; 271 | } 272 | 273 | cmp = rb->cmp(key, tree->key); 274 | 275 | if(cmp < 0) { 276 | tree->left = insert(rb, tree->left, key, data); 277 | } else if(cmp > 0) { 278 | tree->right = insert(rb, tree->right, key, data); 279 | } else { 280 | tree->data = data; 281 | } 282 | 283 | /* fix right-leaning reds */ 284 | if(is_red(tree->right)) { 285 | tree = rot_left(tree); 286 | } 287 | /* fix two reds in a row */ 288 | if(is_red(tree->left) && is_red(tree->left->left)) { 289 | tree = rot_right(tree); 290 | } 291 | 292 | /* if 4-node, split it by color inversion */ 293 | if(is_red(tree->left) && is_red(tree->right)) { 294 | color_flip(tree); 295 | } 296 | 297 | return tree; 298 | } 299 | 300 | static struct rbnode *delete(struct rbtree *rb, struct rbnode *tree, void *key) 301 | { 302 | int cmp; 303 | 304 | if(!tree) { 305 | return 0; 306 | } 307 | 308 | cmp = rb->cmp(key, tree->key); 309 | 310 | if(cmp < 0) { 311 | if(!is_red(tree->left) && !is_red(tree->left->left)) { 312 | tree = move_red_left(tree); 313 | } 314 | tree->left = delete(rb, tree->left, key); 315 | } else { 316 | /* need reds on the right */ 317 | if(is_red(tree->left)) { 318 | tree = rot_right(tree); 319 | } 320 | 321 | /* found it at the bottom (XXX what certifies left is null?) */ 322 | if(cmp == 0 && !tree->right) { 323 | if(rb->del) { 324 | rb->del(tree, rb->del_cls); 325 | } 326 | rb->free(tree); 327 | return 0; 328 | } 329 | 330 | if(!is_red(tree->right) && !is_red(tree->right->left)) { 331 | tree = move_red_left(tree); 332 | } 333 | 334 | if(key == tree->key) { 335 | struct rbnode *rmin = find_min(tree->right); 336 | tree->key = rmin->key; 337 | tree->data = rmin->data; 338 | tree->right = del_min(rb, tree->right); 339 | } else { 340 | tree->right = delete(rb, tree->right, key); 341 | } 342 | } 343 | 344 | return fix_up(tree); 345 | } 346 | 347 | /*static struct rbnode *find(struct rbtree *rb, struct rbnode *node, void *key) 348 | { 349 | int cmp; 350 | 351 | if(!node) 352 | return 0; 353 | 354 | if((cmp = rb->cmp(key, node->key)) == 0) { 355 | return node; 356 | } 357 | return find(rb, cmp < 0 ? node->left : node->right, key); 358 | }*/ 359 | 360 | static void traverse(struct rbnode *node, void (*func)(struct rbnode*, void*), void *cls) 361 | { 362 | if(!node) 363 | return; 364 | 365 | traverse(node->left, func, cls); 366 | func(node, cls); 367 | traverse(node->right, func, cls); 368 | } 369 | 370 | /* helpers */ 371 | 372 | static int is_red(struct rbnode *tree) 373 | { 374 | return tree && tree->red; 375 | } 376 | 377 | static void color_flip(struct rbnode *tree) 378 | { 379 | tree->red = !tree->red; 380 | tree->left->red = !tree->left->red; 381 | tree->right->red = !tree->right->red; 382 | } 383 | 384 | static struct rbnode *rot_left(struct rbnode *a) 385 | { 386 | struct rbnode *b = a->right; 387 | a->right = b->left; 388 | b->left = a; 389 | b->red = a->red; 390 | a->red = 1; 391 | return b; 392 | } 393 | 394 | static struct rbnode *rot_right(struct rbnode *a) 395 | { 396 | struct rbnode *b = a->left; 397 | a->left = b->right; 398 | b->right = a; 399 | b->red = a->red; 400 | a->red = 1; 401 | return b; 402 | } 403 | 404 | static struct rbnode *find_min(struct rbnode *tree) 405 | { 406 | struct rbnode *node; 407 | 408 | if(!tree) 409 | return 0; 410 | 411 | while(node->left) { 412 | node = node->left; 413 | } 414 | return node; 415 | } 416 | 417 | static struct rbnode *del_min(struct rbtree *rb, struct rbnode *tree) 418 | { 419 | if(!tree->left) { 420 | if(rb->del) { 421 | rb->del(tree->left, rb->del_cls); 422 | } 423 | rb->free(tree->left); 424 | return 0; 425 | } 426 | 427 | /* make sure we've got red (3/4-nodes) at the left side so we can delete at the bottom */ 428 | if(!is_red(tree->left) && !is_red(tree->left->left)) { 429 | tree = move_red_left(tree); 430 | } 431 | tree->left = del_min(rb, tree->left); 432 | 433 | /* fix right-reds, red-reds, and split 4-nodes on the way up */ 434 | return fix_up(tree); 435 | } 436 | 437 | #if 0 438 | /* push a red link on this node to the right */ 439 | static struct rbnode *move_red_right(struct rbnode *tree) 440 | { 441 | /* flipping it makes both children go red, so we have a red to the right */ 442 | color_flip(tree); 443 | 444 | /* if after the flip we've got a red-red situation to the left, fix it */ 445 | if(is_red(tree->left->left)) { 446 | tree = rot_right(tree); 447 | color_flip(tree); 448 | } 449 | return tree; 450 | } 451 | #endif 452 | 453 | /* push a red link on this node to the left */ 454 | static struct rbnode *move_red_left(struct rbnode *tree) 455 | { 456 | /* flipping it makes both children go red, so we have a red to the left */ 457 | color_flip(tree); 458 | 459 | /* if after the flip we've got a red-red on the right-left, fix it */ 460 | if(is_red(tree->right->left)) { 461 | tree->right = rot_right(tree->right); 462 | tree = rot_left(tree); 463 | color_flip(tree); 464 | } 465 | return tree; 466 | } 467 | 468 | static struct rbnode *fix_up(struct rbnode *tree) 469 | { 470 | /* fix right-leaning */ 471 | if(is_red(tree->right)) { 472 | tree = rot_left(tree); 473 | } 474 | /* change invalid red-red pairs into a proper 4-node */ 475 | if(is_red(tree->left) && is_red(tree->left->left)) { 476 | tree = rot_right(tree); 477 | } 478 | /* split 4-nodes */ 479 | if(is_red(tree->left) && is_red(tree->right)) { 480 | color_flip(tree); 481 | } 482 | return tree; 483 | } 484 | 485 | void rb_dbg_print_tree(struct rbtree *tree) 486 | { 487 | struct rbnode *node; 488 | 489 | rb_begin(tree); 490 | while((node = rb_next(tree))) { 491 | printf("%d ", rb_node_keyi(node)); 492 | } 493 | printf("\n"); 494 | } 495 | -------------------------------------------------------------------------------- /src/rbtree.h: -------------------------------------------------------------------------------- 1 | #ifndef RBTREE_H_ 2 | #define RBTREE_H_ 3 | 4 | #include /* for size_t */ 5 | 6 | struct rbtree; 7 | struct rbnode; 8 | 9 | 10 | typedef void *(*rb_alloc_func_t)(size_t); 11 | typedef void (*rb_free_func_t)(void*); 12 | 13 | typedef int (*rb_cmp_func_t)(void*, void*); 14 | typedef void (*rb_del_func_t)(struct rbnode*, void*); 15 | 16 | 17 | struct rbtree { 18 | struct rbnode *root; 19 | 20 | rb_alloc_func_t alloc; 21 | rb_free_func_t free; 22 | 23 | rb_cmp_func_t cmp; 24 | rb_del_func_t del; 25 | void *del_cls; 26 | 27 | struct rbnode *rstack, *iter; 28 | }; 29 | 30 | 31 | struct rbnode { 32 | void *key, *data; 33 | int red; 34 | struct rbnode *left, *right; 35 | struct rbnode *next; /* for iterator stack */ 36 | }; 37 | 38 | #define RB_KEY_ADDR (rb_cmp_func_t)(0) 39 | #define RB_KEY_INT (rb_cmp_func_t)(1) 40 | #define RB_KEY_STRING (rb_cmp_func_t)(3) 41 | 42 | 43 | #ifdef __cplusplus 44 | extern "C" { 45 | #endif 46 | 47 | struct rbtree *rb_create(rb_cmp_func_t cmp_func); 48 | void rb_free(struct rbtree *rb); 49 | 50 | int rb_init(struct rbtree *rb, rb_cmp_func_t cmp_func); 51 | void rb_destroy(struct rbtree *rb); 52 | 53 | void rb_clear(struct rbtree *tree); 54 | int rb_copy(struct rbtree *dest, struct rbtree *src); 55 | 56 | void rb_set_allocator(struct rbtree *rb, rb_alloc_func_t alloc, rb_free_func_t free); 57 | void rb_set_compare_func(struct rbtree *rb, rb_cmp_func_t func); 58 | void rb_set_delete_func(struct rbtree *rb, rb_del_func_t func, void *cls); 59 | 60 | int rb_size(struct rbtree *rb); 61 | 62 | int rb_insert(struct rbtree *rb, void *key, void *data); 63 | int rb_inserti(struct rbtree *rb, int key, void *data); 64 | 65 | int rb_delete(struct rbtree *rb, void *key); 66 | int rb_deletei(struct rbtree *rb, int key); 67 | 68 | void *rb_find(struct rbtree *rb, void *key); 69 | void *rb_findi(struct rbtree *rb, int key); 70 | 71 | void rb_foreach(struct rbtree *rb, void (*func)(struct rbnode*, void*), void *cls); 72 | 73 | struct rbnode *rb_root(struct rbtree *rb); 74 | 75 | void rb_begin(struct rbtree *rb); 76 | struct rbnode *rb_next(struct rbtree *rb); 77 | 78 | void *rb_node_key(struct rbnode *node); 79 | int rb_node_keyi(struct rbnode *node); 80 | void *rb_node_data(struct rbnode *node); 81 | 82 | 83 | void rb_dbg_print_tree(struct rbtree *tree); 84 | 85 | #ifdef __cplusplus 86 | } 87 | #endif 88 | 89 | 90 | #endif /* RBTREE_H_ */ 91 | -------------------------------------------------------------------------------- /src/regs.S: -------------------------------------------------------------------------------- 1 | .text 2 | .align 4 3 | 4 | .globl get_regs 5 | get_regs: 6 | pushl %ebp 7 | movl %esp, %ebp 8 | 9 | pushl %edx 10 | movl 8(%ebp), %edx 11 | 12 | movl %eax, (%edx) 13 | movl %ebx, 4(%edx) 14 | movl %ecx, 8(%edx) 15 | 16 | /* juggle edx */ 17 | movl %edx, %eax 18 | popl %edx 19 | movl %edx, 12(%eax) 20 | pushl %edx 21 | movl %eax, %edx 22 | 23 | /* those two are pointless in a function */ 24 | movl %esp, 16(%edx) 25 | movl %ebp, 20(%edx) 26 | 27 | movl %esi, 24(%edx) 28 | movl %edi, 28(%edx) 29 | 30 | pushf 31 | popl %eax 32 | movl %eax, 32(%edx) 33 | 34 | movw %cs, 36(%edx) 35 | movw %ss, 40(%edx) 36 | movw %ds, 44(%edx) 37 | movw %es, 48(%edx) 38 | movw %fs, 52(%edx) 39 | movw %gs, 56(%edx) 40 | 41 | pushl %ebx 42 | movl %cr0, %ebx 43 | movl %ebx, 60(%edx) 44 | /*movl %cr1, %ebx 45 | movl %ebx, 64(%edx)*/ 46 | movl %cr2, %ebx 47 | movl %ebx, 68(%edx) 48 | movl %cr3, %ebx 49 | movl %ebx, 72(%edx) 50 | popl %ebx 51 | 52 | popl %edx 53 | popl %ebp 54 | ret 55 | -------------------------------------------------------------------------------- /src/rtc.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "rtc.h" 5 | 6 | /* CMOS I/O ports */ 7 | #define PORT_CTL 0x70 8 | #define PORT_DATA 0x71 9 | 10 | /* CMOS RTC registers */ 11 | #define REG_SEC 0 12 | #define REG_ALARM_SEC 1 13 | #define REG_MIN 2 14 | #define REG_ALARM_MIN 3 15 | #define REG_HOUR 4 16 | #define REG_ALARM_HOUR 5 17 | #define REG_WEEKDAY 6 18 | #define REG_DAY 7 19 | #define REG_MONTH 8 20 | #define REG_YEAR 9 21 | #define REG_STATA 10 22 | #define REG_STATB 11 23 | #define REG_STATC 12 24 | #define REG_STATD 13 25 | 26 | #define STATA_BUSY (1 << 7) 27 | #define STATB_24HR (1 << 1) 28 | #define STATB_BIN (1 << 2) 29 | 30 | #define HOUR_PM_BIT (1 << 7) 31 | 32 | #define BCD_TO_BIN(x) ((((x) >> 4) & 0xf) * 10 + ((x) & 0xf)) 33 | 34 | static void read_rtc(struct tm *tm); 35 | static int read_reg(int reg); 36 | 37 | 38 | void init_rtc(void) 39 | { 40 | struct tm tm; 41 | 42 | read_rtc(&tm); 43 | start_time = mktime(&tm); 44 | 45 | printf("System real-time clock: %s", asctime(&tm)); 46 | } 47 | 48 | 49 | static void read_rtc(struct tm *tm) 50 | { 51 | int statb, pm; 52 | 53 | /* wait for any clock updates to finish */ 54 | while(read_reg(REG_STATA) & STATA_BUSY); 55 | 56 | tm->tm_sec = read_reg(REG_SEC); 57 | tm->tm_min = read_reg(REG_MIN); 58 | tm->tm_hour = read_reg(REG_HOUR); 59 | tm->tm_mday = read_reg(REG_DAY); 60 | tm->tm_mon = read_reg(REG_MONTH); 61 | tm->tm_year = read_reg(REG_YEAR); 62 | 63 | /* in 12hour mode, bit 7 means post-meridiem */ 64 | pm = tm->tm_hour & HOUR_PM_BIT; 65 | tm->tm_hour &= ~HOUR_PM_BIT; 66 | 67 | /* convert to binary if needed */ 68 | statb = read_reg(REG_STATB); 69 | if(!(statb & STATB_BIN)) { 70 | tm->tm_sec = BCD_TO_BIN(tm->tm_sec); 71 | tm->tm_min = BCD_TO_BIN(tm->tm_min); 72 | tm->tm_hour = BCD_TO_BIN(tm->tm_hour); 73 | tm->tm_mday = BCD_TO_BIN(tm->tm_mday); 74 | tm->tm_mon = BCD_TO_BIN(tm->tm_mon); 75 | tm->tm_year = BCD_TO_BIN(tm->tm_year); 76 | } 77 | 78 | /* make the year an offset from 1900 */ 79 | if(tm->tm_year < 100) { 80 | tm->tm_year += 100; 81 | } else { 82 | tm->tm_year -= 1900; 83 | } 84 | 85 | /* if tm_hour is in 12h mode, convert to 24h */ 86 | if(!(statb & STATB_24HR)) { 87 | if(tm->tm_hour == 12) { 88 | tm->tm_hour = 0; 89 | } 90 | if(pm) { 91 | tm->tm_hour += 12; 92 | } 93 | } 94 | 95 | tm->tm_mon -= 1; /* we want months to start from 0 */ 96 | } 97 | 98 | static int read_reg(int reg) 99 | { 100 | unsigned char val; 101 | outb(reg, PORT_CTL); 102 | iodelay(); 103 | inb(val, PORT_DATA); 104 | iodelay(); 105 | return val; 106 | } 107 | -------------------------------------------------------------------------------- /src/rtc.h: -------------------------------------------------------------------------------- 1 | #ifndef _RTC_H_ 2 | #define _RTC_H_ 3 | 4 | #include 5 | 6 | /* the time read from rtc during init */ 7 | time_t start_time; 8 | 9 | void init_rtc(void); 10 | 11 | #endif /* _RTC_H_ */ 12 | -------------------------------------------------------------------------------- /src/sched.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "sched.h" 4 | #include "proc.h" 5 | #include "intr.h" 6 | #include "asmops.h" 7 | #include "config.h" 8 | 9 | #define EMPTY(q) ((q)->head == 0) 10 | 11 | struct proc_list { 12 | struct process *head, *tail; 13 | }; 14 | 15 | static void idle_proc(void); 16 | static void ins_back(struct proc_list *list, struct process *proc); 17 | static void ins_front(struct proc_list *list, struct process *proc); 18 | static void remove(struct proc_list *list, struct process *proc); 19 | static int hash_addr(void *addr); 20 | 21 | static struct proc_list runq; 22 | static struct proc_list zombieq; 23 | 24 | #define HTBL_SIZE 101 25 | static struct proc_list wait_htable[HTBL_SIZE]; 26 | 27 | 28 | void schedule(void) 29 | { 30 | disable_intr(); 31 | 32 | if(EMPTY(&runq)) { 33 | if(!get_current_proc()) { 34 | /* we're already in the idle process, don't reenter it 35 | * or you'll fill up the stack very quickly. 36 | */ 37 | return; 38 | } 39 | 40 | idle_proc(); 41 | return; 42 | } 43 | 44 | /* if the current process exhausted its timeslice, 45 | * move it to the back of the queue. 46 | */ 47 | if(runq.head->ticks_left <= 0) { 48 | if(runq.head->next) { 49 | struct process *proc = runq.head; 50 | remove(&runq, proc); 51 | ins_back(&runq, proc); 52 | } 53 | 54 | /* start a new timeslice */ 55 | runq.head->ticks_left = TIMESLICE_TICKS; 56 | } 57 | 58 | /* always enter context_switch with interrupts disabled */ 59 | context_switch(runq.head->id); 60 | } 61 | 62 | void add_proc(int pid) 63 | { 64 | int istate; 65 | struct process *proc; 66 | 67 | istate = get_intr_state(); 68 | disable_intr(); 69 | 70 | proc = get_process(pid); 71 | 72 | ins_back(&runq, proc); 73 | proc->state = STATE_RUNNABLE; 74 | 75 | set_intr_state(istate); 76 | } 77 | 78 | void remove_proc(int pid) 79 | { 80 | int istate; 81 | struct process *proc; 82 | 83 | istate = get_intr_state(); 84 | disable_intr(); 85 | 86 | proc = get_process(pid); 87 | remove(&runq, proc); 88 | 89 | set_intr_state(istate); 90 | } 91 | 92 | /* block the process until we get a wakeup call for address ev */ 93 | void wait(void *wait_addr) 94 | { 95 | struct process *p; 96 | int hash_idx; 97 | 98 | disable_intr(); 99 | 100 | p = get_current_proc(); 101 | assert(p); 102 | 103 | /* remove it from the runqueue ... */ 104 | remove(&runq, p); 105 | 106 | /* and place it in the wait hash table based on sleep_addr */ 107 | hash_idx = hash_addr(wait_addr); 108 | ins_back(wait_htable + hash_idx, p); 109 | 110 | p->state = STATE_BLOCKED; 111 | p->wait_addr = wait_addr; 112 | 113 | /* call the scheduler to give time to another process */ 114 | schedule(); 115 | } 116 | 117 | /* wake up all the processes sleeping on this address */ 118 | void wakeup(void *wait_addr) 119 | { 120 | int hash_idx; 121 | struct process *iter; 122 | struct proc_list *list; 123 | 124 | hash_idx = hash_addr(wait_addr); 125 | list = wait_htable + hash_idx; 126 | 127 | iter = list->head; 128 | while(iter) { 129 | if(iter->wait_addr == wait_addr) { 130 | /* found one, remove it, and make it runnable */ 131 | struct process *p = iter; 132 | iter = iter->next; 133 | 134 | remove(list, p); 135 | p->state = STATE_RUNNABLE; 136 | ins_back(&runq, p); 137 | } else { 138 | iter = iter->next; 139 | } 140 | } 141 | } 142 | 143 | static void idle_proc(void) 144 | { 145 | /* make sure we send any pending EOIs if needed. 146 | * end_of_irq will actually check if it's needed first. 147 | */ 148 | struct intr_frame *ifrm = get_intr_frame(); 149 | end_of_irq(INTR_TO_IRQ(ifrm->inum)); 150 | 151 | set_current_pid(0); 152 | 153 | printf("idle loop is running\n"); 154 | 155 | /* make sure interrupts are enabled before halting */ 156 | while(EMPTY(&runq)) { 157 | enable_intr(); 158 | halt_cpu(); 159 | disable_intr(); 160 | } 161 | } 162 | 163 | 164 | /* list operations */ 165 | static void ins_back(struct proc_list *list, struct process *proc) 166 | { 167 | if(EMPTY(list)) { 168 | list->head = proc; 169 | } else { 170 | list->tail->next = proc; 171 | } 172 | 173 | proc->next = 0; 174 | proc->prev = list->tail; 175 | list->tail = proc; 176 | } 177 | 178 | static void ins_front(struct proc_list *list, struct process *proc) 179 | { 180 | if(EMPTY(list)) { 181 | list->tail = proc; 182 | } else { 183 | list->head->prev = proc; 184 | } 185 | 186 | proc->next = list->head; 187 | proc->prev = 0; 188 | list->head = proc; 189 | } 190 | 191 | static void remove(struct proc_list *list, struct process *proc) 192 | { 193 | if(proc->prev) { 194 | proc->prev->next = proc->next; 195 | } 196 | if(proc->next) { 197 | proc->next->prev = proc->prev; 198 | } 199 | if(list->head == proc) { 200 | list->head = proc->next; 201 | } 202 | if(list->tail == proc) { 203 | list->tail = proc->prev; 204 | } 205 | } 206 | 207 | static int hash_addr(void *addr) 208 | { 209 | return (uint32_t)addr % HTBL_SIZE; 210 | } 211 | -------------------------------------------------------------------------------- /src/sched.h: -------------------------------------------------------------------------------- 1 | #ifndef SCHED_H_ 2 | #define SCHED_H_ 3 | 4 | #include "proc.h" 5 | 6 | void schedule(void); 7 | 8 | void add_proc(int pid); 9 | void remove_proc(int pid); 10 | 11 | void wait(void *wait_addr); 12 | void wakeup(void *wait_addr); 13 | 14 | #endif /* SCHED_H_ */ 15 | -------------------------------------------------------------------------------- /src/segm-asm.S: -------------------------------------------------------------------------------- 1 | .data 2 | .align 4 3 | /* memory reserved for setup_selectors */ 4 | off:.long 0 5 | seg:.short 0 6 | /* memory reserved for set_gdt */ 7 | lim:.short 0 8 | addr:.long 0 9 | 10 | .text 11 | /* setup_selectors(uint16_t code, uint16_t data) 12 | * loads the requested selectors to all the selector registers */ 13 | .globl setup_selectors 14 | setup_selectors: 15 | /* set data selectors directly */ 16 | movl 8(%esp), %eax 17 | movw %ax, %ss 18 | movw %ax, %es 19 | movw %ax, %ds 20 | movw %ax, %gs 21 | movw %ax, %fs 22 | /* set cs using a long jump */ 23 | movl 4(%esp), %eax 24 | movw %ax, (seg) 25 | movl $ldcs, (off) 26 | ljmp *off 27 | ldcs: 28 | ret 29 | 30 | /* set_gdt(uint32_t addr, uint16_t limit) 31 | * loads the GDTR with the new address and limit for the GDT */ 32 | .globl set_gdt 33 | set_gdt: 34 | movl 4(%esp), %eax 35 | movl %eax, (addr) 36 | movw 8(%esp), %ax 37 | movw %ax, (lim) 38 | lgdt (lim) 39 | ret 40 | 41 | /* set_task_reg(uint16_t tss_selector) 42 | * loads the TSS selector in the task register */ 43 | .globl set_task_reg 44 | set_task_reg: 45 | mov 4(%esp), %eax 46 | ltr 4(%esp) 47 | ret 48 | -------------------------------------------------------------------------------- /src/segm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "segm.h" 3 | #include "desc.h" 4 | #include "tss.h" 5 | 6 | /* bits for the 3rd 16bt part of the descriptor */ 7 | #define BIT_ACCESSED (1 << 8) 8 | #define BIT_WR (1 << 9) 9 | #define BIT_RD (1 << 9) 10 | #define BIT_EXP_DOWN (1 << 10) 11 | #define BIT_CONFORMING (1 << 10) 12 | #define BIT_CODE (1 << 11) 13 | #define BIT_NOSYS (1 << 12) 14 | #define BIT_PRESENT (1 << 15) 15 | /* TSS busy bit */ 16 | #define BIT_BUSY (1 << 9) 17 | 18 | /* bits for the last 16bit part of the descriptor */ 19 | #define BIT_BIG (1 << 6) 20 | #define BIT_DEFAULT (1 << 6) 21 | #define BIT_GRAN (1 << 7) 22 | 23 | enum {TYPE_DATA, TYPE_CODE}; 24 | 25 | /* we need the following bit pattern at the 8th bit excluding the busy bit: 1001 */ 26 | #define TSS_TYPE_BITS (9 << 8) 27 | 28 | static void segm_desc(desc_t *desc, uint32_t base, uint32_t limit, int dpl, int type); 29 | static void task_desc(desc_t *desc, uint32_t base, uint32_t limit, int dpl); 30 | 31 | /* these functions are implemented in segm-asm.S */ 32 | void setup_selectors(uint16_t code, uint16_t data); 33 | void set_gdt(uint32_t addr, uint16_t limit); 34 | void set_task_reg(uint16_t tss_selector); 35 | 36 | 37 | /* our global descriptor table */ 38 | static desc_t gdt[6]; 39 | 40 | 41 | void init_segm(void) 42 | { 43 | memset(gdt, 0, sizeof gdt); 44 | segm_desc(gdt + SEGM_KCODE, 0, 0xffffffff, 0, TYPE_CODE); 45 | segm_desc(gdt + SEGM_KDATA, 0, 0xffffffff, 0, TYPE_DATA); 46 | segm_desc(gdt + SEGM_UCODE, 0, 0xffffffff, 3, TYPE_CODE); 47 | segm_desc(gdt + SEGM_UDATA, 0, 0xffffffff, 3, TYPE_DATA); 48 | 49 | set_gdt((uint32_t)gdt, sizeof gdt - 1); 50 | 51 | setup_selectors(selector(SEGM_KCODE, 0), selector(SEGM_KDATA, 0)); 52 | } 53 | 54 | /* constructs a GDT selector based on index and priviledge level */ 55 | uint16_t selector(int idx, int rpl) 56 | { 57 | return (idx << 3) | (rpl & 3); 58 | } 59 | 60 | void set_tss(uint32_t addr) 61 | { 62 | task_desc(gdt + SEGM_TASK, addr, sizeof(struct task_state) - 1, 3); 63 | set_task_reg(selector(SEGM_TASK, 0)); 64 | } 65 | 66 | static void segm_desc(desc_t *desc, uint32_t base, uint32_t limit, int dpl, int type) 67 | { 68 | desc->d[0] = limit & 0xffff; /* low order 16bits of limit */ 69 | desc->d[1] = base & 0xffff; /* low order 16bits of base */ 70 | 71 | /* third 16bit part contains the last 8 bits of base, the 2 priviledge 72 | * level bits starting on bit 13, present flag on bit 15, and type bits 73 | * starting from bit 8 74 | */ 75 | desc->d[2] = ((base >> 16) & 0xff) | ((dpl & 3) << 13) | BIT_PRESENT | 76 | BIT_NOSYS | (type == TYPE_DATA ? BIT_WR : (BIT_RD | BIT_CODE)); 77 | 78 | /* last 16bit part contains the last nibble of limit, the last byte of 79 | * base, and the granularity and deafult/big flags in bits 23 and 22 resp. 80 | */ 81 | desc->d[3] = ((limit >> 16) & 0xf) | ((base >> 16) & 0xff00) | BIT_GRAN | BIT_BIG; 82 | } 83 | 84 | static void task_desc(desc_t *desc, uint32_t base, uint32_t limit, int dpl) 85 | { 86 | desc->d[0] = limit & 0xffff; 87 | desc->d[1] = base & 0xffff; 88 | 89 | desc->d[2] = ((base >> 16) & 0xff) | ((dpl & 3) << 13) | BIT_PRESENT | 90 | TSS_TYPE_BITS; /* XXX busy ? */ 91 | desc->d[3] = ((limit >> 16) & 0xf) | ((base >> 16) & 0xff00) | BIT_GRAN; 92 | } 93 | /* 94 | static void dbg_print_gdt(void) 95 | { 96 | int i; 97 | 98 | printf("Global Descriptor Table\n"); 99 | printf("-----------------------\n"); 100 | 101 | for(i=0; i<6; i++) { 102 | print_desc(gdt + i); 103 | } 104 | } 105 | 106 | static void print_desc(desc_t *desc) 107 | { 108 | uint32_t base, limit; 109 | int dpl, g, db, l, avl, p, s, type; 110 | char *type_str; 111 | 112 | base = (uint32_t)desc->d[1] | ((uint32_t)(desc->d[2] & 0xff) << 16) | ((uint32_t)(desc->d[3] >> 8) << 24); 113 | limit = (uint32_t)desc->d[0] | ((uint32_t)(desc->d[3] & 0xf) << 16); 114 | dpl = (desc->d[2] >> 13) & 3; 115 | type = (desc->d[2] >> 8) & 0xf; 116 | g = (desc->d[3] >> 23) & 1; 117 | db = (desc->d[3] >> 22) & 1; 118 | l = (desc->d[3] >> 21) & 1; 119 | avl = (desc->d[3] >> 20) & 1; 120 | 121 | p = (desc->d[2] >> 15) & 1; 122 | s = (desc->d[2] >> 12) & 1; 123 | } 124 | */ 125 | -------------------------------------------------------------------------------- /src/segm.h: -------------------------------------------------------------------------------- 1 | #ifndef SEGM_H_ 2 | #define SEGM_H_ 3 | 4 | #define SEGM_KCODE 1 5 | #define SEGM_KDATA 2 6 | #define SEGM_UCODE 3 7 | #define SEGM_UDATA 4 8 | #define SEGM_TASK 5 9 | 10 | #ifndef ASM 11 | void init_segm(void); 12 | 13 | uint16_t selector(int idx, int rpl); 14 | 15 | void set_tss(uint32_t addr); 16 | #endif /* ASM */ 17 | 18 | 19 | #endif /* SEGM_H_ */ 20 | -------------------------------------------------------------------------------- /src/syscall.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "syscall.h" 3 | #include "intr.h" 4 | #include "proc.h" 5 | #include "sched.h" 6 | #include "timer.h" 7 | #include "fs.h" 8 | 9 | static int (*sys_func[NUM_SYSCALLS])(); 10 | 11 | static void syscall(int inum); 12 | 13 | static int sys_hello(void); 14 | 15 | void init_syscall(void) 16 | { 17 | sys_func[SYS_HELLO] = sys_hello; 18 | sys_func[SYS_SLEEP] = sys_sleep; /* timer.c */ 19 | sys_func[SYS_FORK] = sys_fork; /* proc.c */ 20 | sys_func[SYS_EXIT] = sys_exit; /* proc.c */ 21 | sys_func[SYS_WAITPID] = sys_waitpid; /* proc.c */ 22 | sys_func[SYS_GETPID] = sys_getpid; /* proc.c */ 23 | sys_func[SYS_GETPPID] = sys_getppid; /* proc.c */ 24 | 25 | #if 0 26 | sys_func[SYS_MOUNT] = sys_mount; /* fs.c */ 27 | sys_func[SYS_UMOUNT] = sys_umount; /* fs.c */ 28 | sys_func[SYS_OPEN] = sys_open; /* fs.c */ 29 | sys_func[SYS_CLOSE] = sys_close; /* fs.c */ 30 | sys_func[SYS_READ] = sys_read; /* fs.c */ 31 | sys_func[SYS_WRITE] = sys_write; /* fs.c */ 32 | sys_func[SYS_LSEEK] = sys_lseek; /* fs.c */ 33 | #endif 34 | 35 | interrupt(SYSCALL_INT, syscall); 36 | } 37 | 38 | static void syscall(int inum) 39 | { 40 | struct intr_frame *frm; 41 | int idx; 42 | 43 | frm = get_intr_frame(); 44 | idx = frm->regs.eax; 45 | 46 | if(idx < 0 || idx >= NUM_SYSCALLS) { 47 | printf("invalid syscall: %d\n", idx); 48 | return; 49 | } 50 | 51 | /* the return value goes into the interrupt frame copy of the user's eax 52 | * so that it'll be restored into eax before returning to userland. 53 | */ 54 | frm->regs.eax = sys_func[idx](frm->regs.ebx, frm->regs.ecx, frm->regs.edx, frm->regs.esi, frm->regs.edi); 55 | 56 | /* we don't necessarily want to return to the same process 57 | * might have blocked or exited or whatever, so call schedule 58 | * to decide what's going to run next. 59 | */ 60 | schedule(); 61 | } 62 | 63 | static int sys_hello(void) 64 | { 65 | printf("process %d says hello!\n", get_current_pid()); 66 | return 0; 67 | } 68 | -------------------------------------------------------------------------------- /src/syscall.h: -------------------------------------------------------------------------------- 1 | #ifndef SYSCALL_H_ 2 | #define SYSCALL_H_ 3 | 4 | #define KDEF_SYSCALL_H 5 | #include "kdef.h" 6 | 7 | #ifndef ASM 8 | void init_syscall(void); 9 | #endif 10 | 11 | #endif /* SYSCALL_H_ */ 12 | -------------------------------------------------------------------------------- /src/term.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "term.h" 3 | #include "vid.h" 4 | #include "intr.h" 5 | 6 | static int bg, fg = LTGRAY; 7 | static int cursor_x, cursor_y; 8 | 9 | /* sets the active text color and returns previous value */ 10 | int set_text_color(int c) 11 | { 12 | int prev = fg; 13 | 14 | if(c >= 0 && c < 16) { 15 | fg = c; 16 | } 17 | return prev; 18 | } 19 | 20 | /* sets the active background color and returns the current value */ 21 | int set_back_color(int c) 22 | { 23 | int prev = bg; 24 | 25 | if(c >= 0 && c < 16) { 26 | bg = c; 27 | } 28 | return prev; 29 | } 30 | 31 | /* output a single character, handles formatting, cursor advancement 32 | * and scrolling the screen when we reach the bottom. 33 | */ 34 | int putchar(int c) 35 | { 36 | int istate = get_intr_state(); 37 | disable_intr(); 38 | 39 | switch(c) { 40 | case '\n': 41 | cursor_y++; 42 | 43 | case '\r': 44 | cursor_x = 0; 45 | break; 46 | 47 | case '\b': 48 | cursor_x--; 49 | set_char(' ', cursor_x, cursor_y, fg, bg); 50 | break; 51 | 52 | case '\t': 53 | cursor_x = ((cursor_x >> 3) + 1) << 3; 54 | break; 55 | 56 | default: 57 | if(isprint(c)) { 58 | set_char(c, cursor_x, cursor_y, fg, bg); 59 | if(++cursor_x >= WIDTH) { 60 | cursor_x = 0; 61 | cursor_y++; 62 | } 63 | } 64 | } 65 | 66 | if(cursor_y >= HEIGHT) { 67 | scroll_scr(); 68 | cursor_y--; 69 | } 70 | 71 | set_cursor(cursor_x, cursor_y); 72 | 73 | set_intr_state(istate); 74 | return c; 75 | } 76 | -------------------------------------------------------------------------------- /src/term.h: -------------------------------------------------------------------------------- 1 | #ifndef TERM_H_ 2 | #define TERM_H_ 3 | 4 | int set_text_color(int c); 5 | int set_back_color(int c); 6 | 7 | int putchar(int c); 8 | 9 | #endif /* TERM_H_ */ 10 | -------------------------------------------------------------------------------- /src/test_proc.S: -------------------------------------------------------------------------------- 1 | #define ASM 2 | #include 3 | 4 | .text 5 | .globl test_proc 6 | test_proc: 7 | /* fork another process */ 8 | movl $SYS_FORK, %eax 9 | int $SYSCALL_INT 10 | 11 | /* test copy-on-write by pushing the pid to the stack 12 | * then use this value from the stack times 2 as a sleep 13 | * interval in the loop. 14 | */ 15 | movl $SYS_GETPID, %eax 16 | int $SYSCALL_INT 17 | push %eax 18 | 19 | /* this will count the iterations */ 20 | xor %ecx, %ecx 21 | 22 | infloop: 23 | /* --- print a message --- */ 24 | movl $SYS_HELLO, %eax 25 | int $SYSCALL_INT 26 | 27 | 28 | /* --- sleep for (pid * 2) seconds --- 29 | * grab the pid from the stack and shift it left to 30 | * multiply the pid by 2. Then use that as a sleep interval 31 | * in seconds. 32 | */ 33 | movl (%esp), %ebx 34 | shl $1, %ebx 35 | movl $SYS_SLEEP, %eax 36 | int $SYSCALL_INT 37 | 38 | inc %ecx 39 | 40 | /* let process 2 quit after 2 iterations */ 41 | cmpl $2, (%esp) 42 | jne 1f 43 | cmpl $2, %ecx 44 | je exit_proc 45 | 46 | 1: 47 | jmp infloop 48 | 49 | exit_proc: 50 | movl $SYS_EXIT, %eax 51 | movl $0, %ebx 52 | int $SYSCALL_INT 53 | 54 | /* shouldn't reach this, trap otherwise */ 55 | int $3 56 | 57 | .globl test_proc_end 58 | test_proc_end: 59 | -------------------------------------------------------------------------------- /src/timer.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "intr.h" 4 | #include "asmops.h" 5 | #include "timer.h" 6 | #include "proc.h" 7 | #include "sched.h" 8 | #include "config.h" 9 | 10 | /* frequency of the oscillator driving the 8254 timer */ 11 | #define OSC_FREQ_HZ 1193182 12 | 13 | /* macro to divide and round to the nearest integer */ 14 | #define DIV_ROUND(a, b) ((a) / (b) + ((a) % (b)) / ((b) / 2)) 15 | 16 | /* I/O ports connected to the 8254 */ 17 | #define PORT_DATA0 0x40 18 | #define PORT_DATA1 0x41 19 | #define PORT_DATA2 0x42 20 | #define PORT_CMD 0x43 21 | 22 | /* command bits */ 23 | #define CMD_CHAN0 0 24 | #define CMD_CHAN1 (1 << 6) 25 | #define CMD_CHAN2 (2 << 6) 26 | #define CMD_RDBACK (3 << 6) 27 | 28 | #define CMD_LATCH 0 29 | #define CMD_ACCESS_LOW (1 << 4) 30 | #define CMD_ACCESS_HIGH (2 << 4) 31 | #define CMD_ACCESS_BOTH (3 << 4) 32 | 33 | #define CMD_OP_INT_TERM 0 34 | #define CMD_OP_ONESHOT (1 << 1) 35 | #define CMD_OP_RATE (2 << 1) 36 | #define CMD_OP_SQWAVE (3 << 1) 37 | #define CMD_OP_SOFT_STROBE (4 << 1) 38 | #define CMD_OP_HW_STROBE (5 << 1) 39 | 40 | #define CMD_MODE_BIN 0 41 | #define CMD_MODE_BCD 1 42 | 43 | 44 | #define MSEC_TO_TICKS(ms) ((ms) * TICK_FREQ_HZ / 1000) 45 | 46 | struct timer_event { 47 | int dt; /* remaining ticks delta from the previous event */ 48 | struct timer_event *next; 49 | }; 50 | 51 | 52 | static void timer_handler(); 53 | 54 | 55 | static struct timer_event *evlist; 56 | 57 | 58 | void init_timer(void) 59 | { 60 | /* calculate the reload count: round(osc / freq) */ 61 | int reload_count = DIV_ROUND(OSC_FREQ_HZ, TICK_FREQ_HZ); 62 | 63 | /* set the mode to square wave for channel 0, both low 64 | * and high reload count bytes will follow... 65 | */ 66 | outb(CMD_CHAN0 | CMD_ACCESS_BOTH | CMD_OP_SQWAVE, PORT_CMD); 67 | 68 | /* write the low and high bytes of the reload count to the 69 | * port for channel 0 70 | */ 71 | outb(reload_count & 0xff, PORT_DATA0); 72 | outb((reload_count >> 8) & 0xff, PORT_DATA0); 73 | 74 | /* set the timer interrupt handler */ 75 | interrupt(IRQ_TO_INTR(0), timer_handler); 76 | } 77 | 78 | int sys_sleep(int sec) 79 | { 80 | printf("process %d will sleep for %d seconds\n", get_current_pid(), sec); 81 | sleep(sec * 1000); /* timer.c */ 82 | 83 | /* TODO if interrupted, return the remaining seconds */ 84 | return 0; 85 | } 86 | 87 | void sleep(unsigned long msec) 88 | { 89 | int ticks, tsum, istate; 90 | struct timer_event *ev, *node; 91 | 92 | if((ticks = MSEC_TO_TICKS(msec)) <= 0) { 93 | return; 94 | } 95 | 96 | if(!(ev = malloc(sizeof *ev))) { 97 | printf("sleep: failed to allocate timer_event structure\n"); 98 | return; 99 | } 100 | 101 | istate = get_intr_state(); 102 | disable_intr(); 103 | 104 | /* insert at the beginning */ 105 | if(!evlist || ticks <= evlist->dt) { 106 | ev->next = evlist; 107 | evlist = ev; 108 | 109 | ev->dt = ticks; 110 | if(ev->next) { 111 | ev->next->dt -= ticks; 112 | } 113 | } else { 114 | 115 | tsum = evlist->dt; 116 | node = evlist; 117 | 118 | while(node->next && ticks > tsum + node->next->dt) { 119 | tsum += node->next->dt; 120 | node = node->next; 121 | } 122 | 123 | ev->next = node->next; 124 | node->next = ev; 125 | 126 | /* fix the relative times */ 127 | ev->dt = ticks - tsum; 128 | if(ev->next) { 129 | ev->next->dt -= ev->dt; 130 | } 131 | } 132 | 133 | set_intr_state(istate); 134 | 135 | /* wait on the address of this timer event */ 136 | wait(ev); 137 | } 138 | 139 | /* This will be called by the interrupt dispatcher approximately 140 | * every 1/250th of a second, so it must be extremely fast. 141 | * For now, just increasing a tick counter will suffice. 142 | */ 143 | static void timer_handler(int inum) 144 | { 145 | int istate; 146 | struct process *p; 147 | 148 | nticks++; 149 | 150 | /*printf("TICKS: %d\n", nticks);*/ 151 | 152 | istate = get_intr_state(); 153 | disable_intr(); 154 | 155 | /* find out if there are any timers that have to go off */ 156 | if(evlist) { 157 | evlist->dt--; 158 | 159 | while(evlist && evlist->dt <= 0) { 160 | struct timer_event *ev = evlist; 161 | evlist = evlist->next; 162 | 163 | printf("timer going off!!!\n"); 164 | /* wake up all processes waiting on this address */ 165 | wakeup(ev); 166 | free(ev); 167 | } 168 | } 169 | 170 | /* decrement the process' ticks_left and call the scheduler to decide if 171 | * it's time to switch processes 172 | */ 173 | if((p = get_current_proc())) { 174 | p->ticks_left--; 175 | } 176 | schedule(); 177 | 178 | set_intr_state(istate); 179 | } 180 | -------------------------------------------------------------------------------- /src/timer.h: -------------------------------------------------------------------------------- 1 | #ifndef _TIMER_H_ 2 | #define _TIMER_H_ 3 | 4 | unsigned long nticks; 5 | 6 | void init_timer(void); 7 | 8 | int sys_sleep(int sec); 9 | void sleep(unsigned long msec); 10 | 11 | #endif /* _TIMER_H_ */ 12 | -------------------------------------------------------------------------------- /src/tss.h: -------------------------------------------------------------------------------- 1 | #ifndef TSS_H_ 2 | #define TSS_H_ 3 | 4 | #include 5 | 6 | struct task_state { 7 | uint32_t prev_task; 8 | uint32_t esp0, ss0; /* we only ever set these two values */ 9 | uint32_t esp1, ss1; 10 | uint32_t esp2, ss2; 11 | uint32_t cr3; 12 | uint32_t eip; 13 | uint32_t eflags; 14 | uint32_t eax, ecx, edx, ebx; 15 | uint32_t esp, ebp, esi, edi; 16 | uint32_t es, cs, ss, ds, fs, gs; 17 | uint32_t ldt_sel; 18 | uint16_t trap, iomap_addr; 19 | } __attribute__((packed)); 20 | 21 | #endif /* TSS_H_ */ 22 | -------------------------------------------------------------------------------- /src/vid.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "vid.h" 3 | #include "intr.h" 4 | #include "asmops.h" 5 | 6 | /* height of our virtual console text buffer */ 7 | #define VIRT_HEIGHT 200 8 | 9 | /* CRTC ports */ 10 | #define CRTC_ADDR 0x3d4 11 | #define CRTC_DATA 0x3d5 12 | 13 | /* CRTC registers */ 14 | #define CRTC_START_HIGH 0xc 15 | #define CRTC_START_LOW 0xd 16 | #define CRTC_CURSOR_HIGH 0xe 17 | #define CRTC_CURSOR_LOW 0xf 18 | 19 | /* construct a character with its attributes */ 20 | #define VMEM_CHAR(c, fg, bg) \ 21 | ((uint16_t)(c) | (((uint16_t)(fg) & 0xf) << 8) | (((uint16_t)(bg) & 0xf) << 12)) 22 | 23 | static void set_start_line(int line); 24 | 25 | static uint16_t *vmem = (uint16_t*)0xb8000; 26 | static int start_line; 27 | 28 | 29 | void clear_scr(void) 30 | { 31 | int istate = get_intr_state(); 32 | disable_intr(); 33 | 34 | memset16(vmem, VMEM_CHAR(' ', LTGRAY, BLACK), WIDTH * HEIGHT); 35 | start_line = 0; 36 | set_start_line(0); 37 | set_cursor(0, 0); 38 | 39 | set_intr_state(istate); 40 | } 41 | 42 | void set_char(char c, int x, int y, int fg, int bg) 43 | { 44 | vmem[(y + start_line) * WIDTH + x] = VMEM_CHAR(c, fg, bg); 45 | } 46 | 47 | void set_cursor(int x, int y) 48 | { 49 | int loc; 50 | int istate = get_intr_state(); 51 | disable_intr(); 52 | 53 | if(x < 0 || x >= WIDTH || y < 0 || y >= HEIGHT) { 54 | loc = 0xffff; 55 | } else { 56 | loc = (y + start_line) * WIDTH + x; 57 | } 58 | 59 | outb(CRTC_CURSOR_LOW, CRTC_ADDR); 60 | outb(loc, CRTC_DATA); 61 | outb(CRTC_CURSOR_HIGH, CRTC_ADDR); 62 | outb(loc >> 8, CRTC_DATA); 63 | 64 | set_intr_state(istate); 65 | } 66 | 67 | void scroll_scr(void) 68 | { 69 | int new_line, istate = get_intr_state(); 70 | disable_intr(); 71 | 72 | if(++start_line > VIRT_HEIGHT - HEIGHT) { 73 | /* The bottom of the visible range reached the end of our text buffer. 74 | * Copy the rest of the lines to the top and reset start_line. 75 | */ 76 | memcpy(vmem, vmem + start_line * WIDTH, (HEIGHT - 1) * WIDTH * 2); 77 | start_line = 0; 78 | } 79 | 80 | /* clear the next line that will be revealed by scrolling */ 81 | new_line = start_line + HEIGHT - 1; 82 | memset16(vmem + new_line * WIDTH, VMEM_CHAR(' ', LTGRAY, BLACK), WIDTH); 83 | set_start_line(start_line); 84 | 85 | set_intr_state(istate); 86 | } 87 | 88 | static void set_start_line(int line) 89 | { 90 | int start_addr = line * WIDTH; 91 | 92 | outb(CRTC_START_LOW, CRTC_ADDR); 93 | outb(start_addr & 0xff, CRTC_DATA); 94 | outb(CRTC_START_HIGH, CRTC_ADDR); 95 | outb((start_addr >> 8) & 0xff, CRTC_DATA); 96 | } 97 | -------------------------------------------------------------------------------- /src/vid.h: -------------------------------------------------------------------------------- 1 | #ifndef VID_H_ 2 | #define VID_H_ 3 | 4 | #define WIDTH 80 5 | #define HEIGHT 25 6 | 7 | /* the standard CGA color palette */ 8 | enum { 9 | BLACK, 10 | BLUE, 11 | GREEN, 12 | CYAN, 13 | RED, 14 | MAGENTA, 15 | BROWN, 16 | LTGRAY, 17 | GRAY, 18 | LTBLUE, 19 | LTGREEN, 20 | LTCYAN, 21 | LTRED, 22 | LTMAGENTA, 23 | YELLOW, 24 | WHITE 25 | }; 26 | 27 | void clear_scr(void); 28 | void set_char(char c, int x, int y, int fg, int bg); 29 | void set_cursor(int x, int y); 30 | void scroll_scr(void); 31 | 32 | #endif /* VID_H_ */ 33 | -------------------------------------------------------------------------------- /src/vm-asm.S: -------------------------------------------------------------------------------- 1 | .text 2 | /* enable_paging(void) 3 | * sets bit 31 of cr0 which enables page translation */ 4 | .globl enable_paging 5 | enable_paging: 6 | movl %cr0, %eax 7 | orl $0x80000000, %eax 8 | movl %eax, %cr0 9 | ret 10 | 11 | /* disable_paging(void) 12 | * clears bit 31 of cr0 which disables page translation */ 13 | .globl disable_paging 14 | disable_paging: 15 | movl %cr0, %eax 16 | andl $0x7fffffff, %eax 17 | movl %eax, %cr0 18 | ret 19 | 20 | /* get_paging_status(void) 21 | * returns 0 if paging is disabled or 1 if it's enabled */ 22 | .globl get_paging_status 23 | get_paging_status: 24 | movl %cr0, %eax 25 | shr $31, %eax 26 | ret 27 | 28 | /* set_pgdir_addr(uint32_t addr) 29 | * sets the address of the page directory by writing to cr3, which 30 | * also results in a TLB flush. */ 31 | .globl set_pgdir_addr 32 | set_pgdir_addr: 33 | movl 4(%esp), %eax 34 | movl %eax, %cr3 35 | ret 36 | 37 | /* get_pgdir_addr(void) 38 | * returns the physical address of the page table directory (cr3) */ 39 | .globl get_pgdir_addr 40 | get_pgdir_addr: 41 | movl %cr3, %eax 42 | ret 43 | 44 | /* flush_tlb(void) 45 | * invalidates the whole TLB. entries for pages marked as global 46 | * are unaffected */ 47 | .globl flush_tlb 48 | flush_tlb: 49 | movl %cr3, %eax 50 | movl %eax, %cr3 51 | ret 52 | 53 | /* flush_tlb_addr(uint32_t addr) 54 | * flushes the TLB entry for the page containing a particular 55 | * virtual address */ 56 | .globl flush_tlb_addr 57 | flush_tlb_addr: 58 | movl 4(%esp), %eax 59 | invlpg (%eax) 60 | ret 61 | 62 | /* get_fault_addr(void) 63 | * returns the contents of control register 2, which provides 64 | * the faulting address during a page fault exception 65 | */ 66 | .globl get_fault_addr 67 | get_fault_addr: 68 | movl %cr2, %eax 69 | ret 70 | -------------------------------------------------------------------------------- /src/vm.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "config.h" 6 | #include "vm.h" 7 | #include "intr.h" 8 | #include "mem.h" 9 | #include "panic.h" 10 | #include "proc.h" 11 | 12 | #define IDMAP_START 0xa0000 13 | 14 | #define PGDIR_ADDR 0xfffff000 15 | #define PGTBL_BASE (0xffffffff - 4096 * 1024 + 1) 16 | #define PGTBL(x) ((uint32_t*)(PGTBL_BASE + PGSIZE * (x))) 17 | 18 | #define ATTR_PGDIR_MASK 0x3f 19 | #define ATTR_PGTBL_MASK 0x1ff 20 | 21 | #define PAGEFAULT 14 22 | 23 | 24 | struct page_range { 25 | int start, end; 26 | struct page_range *next; 27 | }; 28 | 29 | /* defined in vm-asm.S */ 30 | void enable_paging(void); 31 | void disable_paging(void); 32 | int get_paging_status(void); 33 | void set_pgdir_addr(uint32_t addr); 34 | void flush_tlb(void); 35 | void flush_tlb_addr(uint32_t addr); 36 | #define flush_tlb_page(p) flush_tlb_addr(PAGE_TO_ADDR(p)) 37 | uint32_t get_fault_addr(void); 38 | 39 | static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high); 40 | static void pgfault(int inum); 41 | static int copy_on_write(struct vm_page *page); 42 | static struct page_range *alloc_node(void); 43 | static void free_node(struct page_range *node); 44 | 45 | /* page directory */ 46 | static uint32_t *pgdir; 47 | 48 | /* 2 lists of free ranges, for kernel memory and user memory */ 49 | static struct page_range *pglist[2]; 50 | /* list of free page_range structures to be used in the lists */ 51 | static struct page_range *node_pool; 52 | /* the first page range for the whole kernel address space, to get things started */ 53 | static struct page_range first_node; 54 | 55 | 56 | void init_vm(void) 57 | { 58 | uint32_t idmap_end; 59 | int i, kmem_start_pg, pgtbl_base_pg; 60 | 61 | /* setup the page tables */ 62 | pgdir = (uint32_t*)alloc_phys_page(); 63 | memset(pgdir, 0, PGSIZE); 64 | set_pgdir_addr((uint32_t)pgdir); 65 | 66 | /* map the video memory and kernel code 1-1 */ 67 | get_kernel_mem_range(0, &idmap_end); 68 | map_mem_range(IDMAP_START, idmap_end - IDMAP_START, IDMAP_START, 0); 69 | 70 | /* make the last page directory entry point to the page directory */ 71 | pgdir[1023] = ((uint32_t)pgdir & PGENT_ADDR_MASK) | PG_PRESENT; 72 | pgdir = (uint32_t*)PGDIR_ADDR; 73 | 74 | /* set the page fault handler */ 75 | interrupt(PAGEFAULT, pgfault); 76 | 77 | /* we can enable paging now */ 78 | enable_paging(); 79 | 80 | /* initialize the virtual page allocator */ 81 | node_pool = 0; 82 | 83 | kmem_start_pg = ADDR_TO_PAGE(KMEM_START); 84 | pgtbl_base_pg = ADDR_TO_PAGE(PGTBL_BASE); 85 | 86 | first_node.start = kmem_start_pg; 87 | first_node.end = pgtbl_base_pg; 88 | first_node.next = 0; 89 | pglist[MEM_KERNEL] = &first_node; 90 | 91 | pglist[MEM_USER] = alloc_node(); 92 | pglist[MEM_USER]->start = ADDR_TO_PAGE(idmap_end); 93 | pglist[MEM_USER]->end = kmem_start_pg; 94 | pglist[MEM_USER]->next = 0; 95 | 96 | /* temporarily map something into every 1024th page of the kernel address 97 | * space to force pre-allocation of all the kernel page-tables 98 | */ 99 | for(i=kmem_start_pg; ivpage = vpage; 169 | page->ppage = ppage; 170 | page->flags = (attr & ATTR_PGTBL_MASK) | PG_PRESENT; 171 | page->nref = 1; 172 | 173 | rb_inserti(&p->vmmap, vpage, page); 174 | } else { 175 | /* otherwise just update the mapping */ 176 | page->ppage = ppage; 177 | 178 | /* XXX don't touch the flags, as that's how we implement CoW 179 | * by changing the mapping without affecting the vm_page 180 | */ 181 | } 182 | } 183 | 184 | set_intr_state(intr_state); 185 | return 0; 186 | } 187 | 188 | int unmap_page(int vpage) 189 | { 190 | uint32_t *pgtbl; 191 | int res = 0; 192 | int diridx = PAGE_TO_PGTBL(vpage); 193 | int pgidx = PAGE_TO_PGTBL_PG(vpage); 194 | 195 | int intr_state = get_intr_state(); 196 | disable_intr(); 197 | 198 | if(!(pgdir[diridx] & PG_PRESENT)) { 199 | goto err; 200 | } 201 | pgtbl = PGTBL(diridx); 202 | 203 | if(!(pgtbl[pgidx] & PG_PRESENT)) { 204 | goto err; 205 | } 206 | pgtbl[pgidx] = 0; 207 | flush_tlb_page(vpage); 208 | 209 | if(0) { 210 | err: 211 | printf("unmap_page(%d): page already not mapped\n", vpage); 212 | res = -1; 213 | } 214 | set_intr_state(intr_state); 215 | return res; 216 | } 217 | 218 | /* if ppg_start is -1, we allocate physical pages to map with alloc_phys_page() */ 219 | int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr) 220 | { 221 | int i, phys_pg; 222 | 223 | for(i=0; i 0 ? ADDR_TO_PAGE(paddr) : -1; 255 | num_pages = ADDR_TO_PAGE(sz) + 1; 256 | 257 | return map_page_range(vpg_start, num_pages, ppg_start, attr); 258 | } 259 | 260 | /* translate a virtual address to a physical address using the current page table */ 261 | uint32_t virt_to_phys(uint32_t vaddr) 262 | { 263 | int pg; 264 | uint32_t pgaddr; 265 | 266 | if((pg = virt_to_phys_page(ADDR_TO_PAGE(vaddr))) == -1) { 267 | return 0; 268 | } 269 | pgaddr = PAGE_TO_ADDR(pg); 270 | 271 | return pgaddr | ADDR_TO_PGOFFS(vaddr); 272 | } 273 | 274 | /* translate a virtual page number to a physical page number using the current page table */ 275 | int virt_to_phys_page(int vpg) 276 | { 277 | uint32_t pgaddr, *pgtbl; 278 | int diridx, pgidx; 279 | 280 | if(vpg < 0 || vpg >= PAGE_COUNT) { 281 | return -1; 282 | } 283 | 284 | diridx = PAGE_TO_PGTBL(vpg); 285 | pgidx = PAGE_TO_PGTBL_PG(vpg); 286 | 287 | if(!(pgdir[diridx] & PG_PRESENT)) { 288 | return -1; 289 | } 290 | pgtbl = PGTBL(diridx); 291 | 292 | if(!(pgtbl[pgidx] & PG_PRESENT)) { 293 | return -1; 294 | } 295 | pgaddr = pgtbl[pgidx] & PGENT_ADDR_MASK; 296 | return ADDR_TO_PAGE(pgaddr); 297 | } 298 | 299 | /* same as virt_to_phys, but uses the vm_page tree instead of the actual page table */ 300 | uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr) 301 | { 302 | int pg; 303 | uint32_t pgaddr; 304 | 305 | if((pg = virt_to_phys_page_proc(p, ADDR_TO_PAGE(vaddr))) == -1) { 306 | return 0; 307 | } 308 | pgaddr = PAGE_TO_ADDR(pg); 309 | 310 | return pgaddr | ADDR_TO_PGOFFS(vaddr); 311 | } 312 | 313 | /* same virt_to_phys_page, but uses the vm_page tree instead of the actual page table */ 314 | int virt_to_phys_page_proc(struct process *p, int vpg) 315 | { 316 | struct rbnode *node; 317 | assert(p); 318 | 319 | if(!(node = rb_findi(&p->vmmap, vpg))) { 320 | return -1; 321 | } 322 | return ((struct vm_page*)node->data)->ppage; 323 | } 324 | 325 | /* allocate a contiguous block of virtual memory pages along with 326 | * backing physical memory for them, and update the page table. 327 | */ 328 | int pgalloc(int num, int area) 329 | { 330 | int intr_state, ret = -1; 331 | struct page_range *node, *prev, dummy; 332 | 333 | intr_state = get_intr_state(); 334 | disable_intr(); 335 | 336 | dummy.next = pglist[area]; 337 | node = pglist[area]; 338 | prev = &dummy; 339 | 340 | while(node) { 341 | if(node->end - node->start >= num) { 342 | ret = node->start; 343 | node->start += num; 344 | 345 | if(node->start == node->end) { 346 | prev->next = node->next; 347 | node->next = 0; 348 | 349 | if(node == pglist[area]) { 350 | pglist[area] = 0; 351 | } 352 | free_node(node); 353 | } 354 | break; 355 | } 356 | 357 | prev = node; 358 | node = node->next; 359 | } 360 | 361 | if(ret >= 0) { 362 | /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/ 363 | unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0; 364 | 365 | /* allocate physical storage and map */ 366 | if(map_page_range(ret, num, -1, attr) == -1) { 367 | ret = -1; 368 | } 369 | } 370 | 371 | set_intr_state(intr_state); 372 | return ret; 373 | } 374 | 375 | int pgalloc_vrange(int start, int num) 376 | { 377 | struct page_range *node, *prev, dummy; 378 | int area, intr_state, ret = -1; 379 | 380 | area = (start >= ADDR_TO_PAGE(KMEM_START)) ? MEM_KERNEL : MEM_USER; 381 | if(area == MEM_USER && start + num > ADDR_TO_PAGE(KMEM_START)) { 382 | printf("pgalloc_vrange: invalid range request crossing user/kernel split\n"); 383 | return -1; 384 | } 385 | 386 | intr_state = get_intr_state(); 387 | disable_intr(); 388 | 389 | dummy.next = pglist[area]; 390 | node = pglist[area]; 391 | prev = &dummy; 392 | 393 | /* check to see if the requested VM range is available */ 394 | node = pglist[area]; 395 | while(node) { 396 | if(start >= node->start && start + num <= node->end) { 397 | ret = start; /* can do .. */ 398 | 399 | if(start == node->start) { 400 | /* adjacent to the start of the range */ 401 | node->start += num; 402 | } else if(start + num == node->end) { 403 | /* adjacent to the end of the range */ 404 | node->end = start; 405 | } else { 406 | /* somewhere in the middle, which means we need 407 | * to allocate a new page_range 408 | */ 409 | struct page_range *newnode; 410 | 411 | if(!(newnode = alloc_node())) { 412 | panic("pgalloc_vrange failed to allocate new page_range while splitting a range in half... bummer\n"); 413 | } 414 | newnode->start = start + num; 415 | newnode->end = node->end; 416 | newnode->next = node->next; 417 | 418 | node->end = start; 419 | node->next = newnode; 420 | /* no need to check for null nodes at this point, there's 421 | * certainly stuff at the begining and the end, otherwise we 422 | * wouldn't be here. so break out of it. 423 | */ 424 | break; 425 | } 426 | 427 | if(node->start == node->end) { 428 | prev->next = node->next; 429 | node->next = 0; 430 | 431 | if(node == pglist[area]) { 432 | pglist[area] = 0; 433 | } 434 | free_node(node); 435 | } 436 | break; 437 | } 438 | 439 | prev = node; 440 | node = node->next; 441 | } 442 | 443 | if(ret >= 0) { 444 | /*unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : PG_GLOBAL;*/ 445 | unsigned int attr = (area == MEM_USER) ? (PG_USER | PG_WRITABLE) : 0; 446 | 447 | /* allocate physical storage and map */ 448 | if(map_page_range(ret, num, -1, attr) == -1) { 449 | ret = -1; 450 | } 451 | } 452 | 453 | set_intr_state(intr_state); 454 | return ret; 455 | } 456 | 457 | void pgfree(int start, int num) 458 | { 459 | int i, area, intr_state; 460 | struct page_range *node, *new, *prev, *next; 461 | 462 | intr_state = get_intr_state(); 463 | disable_intr(); 464 | 465 | for(i=0; istart = start; 476 | new->end = start + num; 477 | 478 | area = PAGE_TO_ADDR(start) >= KMEM_START ? MEM_KERNEL : MEM_USER; 479 | 480 | if(!pglist[area] || pglist[area]->start > start) { 481 | next = new->next = pglist[area]; 482 | pglist[area] = new; 483 | prev = 0; 484 | 485 | } else { 486 | 487 | prev = 0; 488 | node = pglist[area]; 489 | next = node ? node->next : 0; 490 | 491 | while(node) { 492 | if(!next || next->start > start) { 493 | /* place here, after node */ 494 | new->next = next; 495 | node->next = new; 496 | prev = node; /* needed by coalesce after the loop */ 497 | break; 498 | } 499 | 500 | prev = node; 501 | node = next; 502 | next = node ? node->next : 0; 503 | } 504 | } 505 | 506 | coalesce(prev, new, next); 507 | set_intr_state(intr_state); 508 | } 509 | 510 | static void coalesce(struct page_range *low, struct page_range *mid, struct page_range *high) 511 | { 512 | if(high) { 513 | if(mid->end == high->start) { 514 | mid->end = high->end; 515 | mid->next = high->next; 516 | free_node(high); 517 | } 518 | } 519 | 520 | if(low) { 521 | if(low->end == mid->start) { 522 | low->end += mid->end; 523 | low->next = mid->next; 524 | free_node(mid); 525 | } 526 | } 527 | } 528 | 529 | static void pgfault(int inum) 530 | { 531 | struct intr_frame *frm = get_intr_frame(); 532 | uint32_t fault_addr = get_fault_addr(); 533 | 534 | /* the fault occured in user space */ 535 | if(frm->err & PG_USER) { 536 | int fault_page = ADDR_TO_PAGE(fault_addr); 537 | struct process *proc = get_current_proc(); 538 | printf("DBG: page fault in user space (pid: %d)\n", proc->id); 539 | assert(proc); 540 | 541 | if(frm->err & PG_PRESENT) { 542 | /* it's not due to a missing page fetch the attributes */ 543 | int pgnum = ADDR_TO_PAGE(fault_addr); 544 | 545 | if((frm->err & PG_WRITABLE) && (get_page_bit(pgnum, PG_WRITABLE, 0) == 0)) { 546 | /* write permission fault might be a CoW fault or just an error 547 | * fetch the vm_page permissions to check if this is suppoosed to be 548 | * a writable page (which means we should CoW). 549 | */ 550 | struct vm_page *page = get_vm_page_proc(proc, pgnum); 551 | 552 | if(page->flags & PG_WRITABLE) { 553 | /* ok this is a CoW fault */ 554 | if(copy_on_write(page) == -1) { 555 | panic("copy on write failed!"); 556 | } 557 | return; /* done, allow the process to restart the instruction and continue */ 558 | } else { 559 | /* TODO eventually we'll SIGSEGV the process, for now just panic. 560 | */ 561 | goto unhandled; 562 | } 563 | } 564 | goto unhandled; 565 | } 566 | 567 | /* so it's a missing page... ok */ 568 | 569 | /* detect if it's an automatic stack growth deal */ 570 | if(fault_page < proc->user_stack_pg && proc->user_stack_pg - fault_page < USTACK_MAXGROW) { 571 | int num_pages = proc->user_stack_pg - fault_page; 572 | printf("growing user (%d) stack by %d pages\n", proc->id, num_pages); 573 | 574 | if(pgalloc_vrange(fault_page, num_pages) != fault_page) { 575 | printf("failed to allocate VM for stack growth\n"); 576 | /* TODO: in the future we'd SIGSEGV the process here, for now just panic */ 577 | goto unhandled; 578 | } 579 | proc->user_stack_pg = fault_page; 580 | return; 581 | } 582 | 583 | /* it's not a stack growth fault. since we don't do swapping yet, just 584 | * fall to unhandled and panic 585 | */ 586 | } 587 | 588 | unhandled: 589 | printf("~~~~ PAGE FAULT ~~~~\n"); 590 | printf("fault address: %x\n", fault_addr); 591 | printf("error code: %x\n", frm->err); 592 | 593 | if(frm->err & PG_PRESENT) { 594 | if(frm->err & 8) { 595 | printf("reserved bit set in some paging structure\n"); 596 | } else { 597 | printf("%s protection violation ", (frm->err & PG_WRITABLE) ? "WRITE" : "READ"); 598 | printf("in %s mode\n", (frm->err & PG_USER) ? "user" : "kernel"); 599 | } 600 | } else { 601 | printf("page not present\n"); 602 | } 603 | 604 | panic("unhandled page fault\n"); 605 | } 606 | 607 | /* copy-on-write handler, called from pgfault above */ 608 | static int copy_on_write(struct vm_page *page) 609 | { 610 | int tmpvpg; 611 | struct vm_page *newpage; 612 | struct rbnode *vmnode; 613 | struct process *p = get_current_proc(); 614 | 615 | assert(page->nref > 0); 616 | 617 | /* first of all check the refcount. If it's 1 then we don't need to copy 618 | * anything. This will happen when all forked processes except one have 619 | * marked this read-write again after faulting. 620 | */ 621 | if(page->nref == 1) { 622 | set_page_bit(page->vpage, PG_WRITABLE, PAGE_ONLY); 623 | return 0; 624 | } 625 | 626 | /* ok let's make a copy and mark it read-write */ 627 | if(!(newpage = malloc(sizeof *newpage))) { 628 | printf("copy_on_write: failed to allocate new vm_page\n"); 629 | return -1; 630 | } 631 | newpage->vpage = page->vpage; 632 | newpage->flags = page->flags; 633 | 634 | if(!(tmpvpg = pgalloc(1, MEM_KERNEL))) { 635 | printf("copy_on_write: failed to allocate physical page\n"); 636 | /* XXX proper action: SIGSEGV */ 637 | return -1; 638 | } 639 | newpage->ppage = virt_to_phys_page(tmpvpg); 640 | newpage->nref = 1; 641 | 642 | /* do the copy */ 643 | memcpy((void*)PAGE_TO_ADDR(tmpvpg), (void*)PAGE_TO_ADDR(page->vpage), PGSIZE); 644 | unmap_page(tmpvpg); 645 | pgfree(tmpvpg, 1); 646 | 647 | /* set the new vm_page in the process vmmap */ 648 | vmnode = rb_findi(&p->vmmap, newpage->vpage); 649 | assert(vmnode && vmnode->data == page); /* shouldn't be able to fail */ 650 | vmnode->data = newpage; 651 | 652 | /* also update tha page table */ 653 | map_page(newpage->vpage, newpage->ppage, newpage->flags); 654 | 655 | /* finally decrease the refcount at the original vm_page struct */ 656 | page->nref--; 657 | return 0; 658 | } 659 | 660 | /* --- page range list node management --- */ 661 | #define NODES_IN_PAGE (PGSIZE / sizeof(struct page_range)) 662 | 663 | static struct page_range *alloc_node(void) 664 | { 665 | struct page_range *node; 666 | int pg, i; 667 | 668 | if(node_pool) { 669 | node = node_pool; 670 | node_pool = node_pool->next; 671 | /*printf("alloc_node -> %x\n", (unsigned int)node);*/ 672 | return node; 673 | } 674 | 675 | /* no node structures in the pool, we need to allocate a new page, 676 | * split it up into node structures, add them in the pool, and 677 | * allocate one of them. 678 | */ 679 | if(!(pg = pgalloc(1, MEM_KERNEL))) { 680 | panic("ran out of physical memory while allocating VM range structures\n"); 681 | } 682 | node_pool = (struct page_range*)PAGE_TO_ADDR(pg); 683 | 684 | /* link them up, skip the first as we'll just allocate it anyway */ 685 | for(i=2; i %x\n", (unsigned int)node);*/ 693 | return node; 694 | } 695 | 696 | static void free_node(struct page_range *node) 697 | { 698 | node->next = node_pool; 699 | node_pool = node; 700 | /*printf("free_node\n");*/ 701 | } 702 | 703 | /* clone_vm makes a copy of the current page tables, thus duplicating the 704 | * virtual address space. 705 | * 706 | * For the kernel part of the address space (last 256 page directory entries) 707 | * we don't want to diplicate the page tables, just point all page directory 708 | * entries to the same set of page tables. 709 | * 710 | * If "cow" is non-zero it also marks the shared user-space pages as 711 | * read-only, to implement copy-on-write. 712 | */ 713 | void clone_vm(struct process *pdest, struct process *psrc, int cow) 714 | { 715 | int i, j, dirpg, tblpg, kstart_dirent; 716 | uint32_t paddr; 717 | uint32_t *ndir, *ntbl; 718 | struct rbnode *vmnode; 719 | 720 | /* allocate the new page directory */ 721 | if((dirpg = pgalloc(1, MEM_KERNEL)) == -1) { 722 | panic("clone_vmem: failed to allocate page directory page\n"); 723 | } 724 | ndir = (uint32_t*)PAGE_TO_ADDR(dirpg); 725 | 726 | /* allocate a virtual page for temporarily mapping all new 727 | * page tables while we populate them. 728 | */ 729 | if((tblpg = pgalloc(1, MEM_KERNEL)) == -1) { 730 | panic("clone_vmem: failed to allocate page table page\n"); 731 | } 732 | ntbl = (uint32_t*)PAGE_TO_ADDR(tblpg); 733 | 734 | /* we will allocate physical pages and map them to this virtual page 735 | * as needed in the loop below. we don't need the physical page allocated 736 | * by pgalloc. 737 | */ 738 | free_phys_page(virt_to_phys((uint32_t)ntbl)); 739 | 740 | kstart_dirent = ADDR_TO_PAGE(KMEM_START) / 1024; 741 | 742 | /* user space */ 743 | for(i=0; ivmmap, RB_KEY_INT); 775 | rb_begin(&psrc->vmmap); 776 | while((vmnode = rb_next(&psrc->vmmap))) { 777 | struct vm_page *pg = vmnode->data; 778 | pg->nref++; 779 | 780 | /* insert the same vm_page to the new tree */ 781 | rb_inserti(&pdest->vmmap, pg->vpage, pg); 782 | } 783 | 784 | /* for the kernel space we'll just use the same page tables */ 785 | for(i=kstart_dirent; i<1023; i++) { 786 | ndir[i] = pgdir[i]; 787 | } 788 | 789 | /* also point the last page directory entry to the page directory address 790 | * since we're relying on recursive page tables 791 | */ 792 | paddr = virt_to_phys((uint32_t)ndir); 793 | ndir[1023] = paddr | PG_PRESENT; 794 | 795 | if(cow) { 796 | /* we just changed all the page protection bits, so we need to flush the TLB */ 797 | flush_tlb(); 798 | } 799 | 800 | /* unmap before freeing the virtual pages, to avoid deallocating the physical pages */ 801 | unmap_page(dirpg); 802 | unmap_page(tblpg); 803 | 804 | pgfree(dirpg, 1); 805 | pgfree(tblpg, 1); 806 | 807 | /* set the new page directory pointer */ 808 | pdest->ctx.pgtbl_paddr = paddr; 809 | } 810 | 811 | /* cleanup_vm called by exit to clean up any memory used by the process */ 812 | void cleanup_vm(struct process *p) 813 | { 814 | struct rbnode *vmnode; 815 | 816 | /* go through the vm map and reduce refcounts all around 817 | * when a ref goes to 0, free the physical page 818 | */ 819 | rb_begin(&p->vmmap); 820 | while((vmnode = rb_next(&p->vmmap))) { 821 | struct vm_page *page = vmnode->data; 822 | 823 | /* skip kernel pages obviously */ 824 | if(!(page->flags & PG_USER)) { 825 | continue; 826 | } 827 | 828 | if(--page->nref <= 0) { 829 | /* free the physical page if nref goes to 0 */ 830 | free_phys_page(PAGE_TO_ADDR(page->ppage)); 831 | } 832 | } 833 | 834 | /* destroying the tree will free the nodes */ 835 | rb_destroy(&p->vmmap); 836 | } 837 | 838 | 839 | int get_page_bit(int pgnum, uint32_t bit, int wholepath) 840 | { 841 | int tidx = PAGE_TO_PGTBL(pgnum); 842 | int tent = PAGE_TO_PGTBL_PG(pgnum); 843 | uint32_t *pgtbl = PGTBL(tidx); 844 | 845 | if(wholepath) { 846 | if((pgdir[tidx] & bit) == 0) { 847 | return 0; 848 | } 849 | } 850 | 851 | return pgtbl[tent] & bit; 852 | } 853 | 854 | void set_page_bit(int pgnum, uint32_t bit, int wholepath) 855 | { 856 | int tidx = PAGE_TO_PGTBL(pgnum); 857 | int tent = PAGE_TO_PGTBL_PG(pgnum); 858 | uint32_t *pgtbl = PGTBL(tidx); 859 | 860 | if(wholepath) { 861 | pgdir[tidx] |= bit; 862 | } 863 | pgtbl[tent] |= bit; 864 | 865 | flush_tlb_page(pgnum); 866 | } 867 | 868 | void clear_page_bit(int pgnum, uint32_t bit, int wholepath) 869 | { 870 | int tidx = PAGE_TO_PGTBL(pgnum); 871 | int tent = PAGE_TO_PGTBL_PG(pgnum); 872 | uint32_t *pgtbl = PGTBL(tidx); 873 | 874 | if(wholepath) { 875 | pgdir[tidx] &= ~bit; 876 | } 877 | 878 | pgtbl[tent] &= ~bit; 879 | 880 | flush_tlb_page(pgnum); 881 | } 882 | 883 | 884 | #define USER_PGDIR_ENTRIES PAGE_TO_PGTBL(KMEM_START_PAGE) 885 | int cons_vmmap(struct rbtree *vmmap) 886 | { 887 | int i, j; 888 | 889 | rb_init(vmmap, RB_KEY_INT); 890 | 891 | for(i=0; ivpage = i * 1024 + j; 904 | vmp->ppage = ADDR_TO_PAGE(pgtbl[j] & PGENT_ADDR_MASK); 905 | vmp->flags = pgtbl[j] & ATTR_PGTBL_MASK; 906 | vmp->nref = 1; /* when first created assume no sharing */ 907 | 908 | rb_inserti(vmmap, vmp->vpage, vmp); 909 | } 910 | } 911 | } 912 | } 913 | 914 | return 0; 915 | } 916 | 917 | struct vm_page *get_vm_page(int vpg) 918 | { 919 | return get_vm_page_proc(get_current_proc(), vpg); 920 | } 921 | 922 | struct vm_page *get_vm_page_proc(struct process *p, int vpg) 923 | { 924 | struct rbnode *node; 925 | 926 | if(!p || !(node = rb_findi(&p->vmmap, vpg))) { 927 | return 0; 928 | } 929 | return node->data; 930 | } 931 | 932 | 933 | void dbg_print_vm(int area) 934 | { 935 | struct page_range *node; 936 | int last, intr_state; 937 | 938 | intr_state = get_intr_state(); 939 | disable_intr(); 940 | 941 | node = pglist[area]; 942 | last = area == MEM_USER ? 0 : ADDR_TO_PAGE(KMEM_START); 943 | 944 | printf("%s vm space\n", area == MEM_USER ? "user" : "kernel"); 945 | 946 | while(node) { 947 | if(node->start > last) { 948 | printf(" vm-used: %x -> %x\n", PAGE_TO_ADDR(last), PAGE_TO_ADDR(node->start)); 949 | } 950 | 951 | printf(" vm-free: %x -> ", PAGE_TO_ADDR(node->start)); 952 | if(node->end >= PAGE_COUNT) { 953 | printf("END\n"); 954 | } else { 955 | printf("%x\n", PAGE_TO_ADDR(node->end)); 956 | } 957 | 958 | last = node->end; 959 | node = node->next; 960 | } 961 | 962 | set_intr_state(intr_state); 963 | } 964 | -------------------------------------------------------------------------------- /src/vm.h: -------------------------------------------------------------------------------- 1 | #ifndef VM_H_ 2 | #define VM_H_ 3 | 4 | #include 5 | #include "mboot.h" 6 | #include "rbtree.h" 7 | 8 | #define KMEM_START 0xc0000000 9 | #define KMEM_START_PAGE ADDR_TO_PAGE(KMEM_START) 10 | 11 | /* page mapping flags */ 12 | #define PG_PRESENT (1 << 0) 13 | #define PG_WRITABLE (1 << 1) 14 | #define PG_USER (1 << 2) 15 | #define PG_WRITE_THROUGH (1 << 3) 16 | #define PG_NOCACHE (1 << 4) 17 | #define PG_ACCESSED (1 << 5) 18 | #define PG_DIRTY (1 << 6) 19 | #define PG_TYPE (1 << 7) 20 | /* PG_GLOBAL mappings won't flush from TLB */ 21 | #define PG_GLOBAL (1 << 8) 22 | 23 | 24 | #define PGSIZE 4096 25 | #define PAGE_COUNT (1024 * 1024) 26 | 27 | #define PGOFFS_MASK 0xfff 28 | #define PGNUM_MASK 0xfffff000 29 | #define PGENT_ADDR_MASK PGNUM_MASK 30 | 31 | #define ADDR_TO_PAGE(x) ((uint32_t)(x) >> 12) 32 | #define PAGE_TO_ADDR(x) ((uint32_t)(x) << 12) 33 | 34 | #define ADDR_TO_PGTBL(x) ((uint32_t)(x) >> 22) 35 | #define ADDR_TO_PGTBL_PG(x) (((uint32_t)(x) >> 12) & 0x3ff) 36 | #define ADDR_TO_PGOFFS(x) ((uint32_t)(x) & PGOFFS_MASK) 37 | 38 | #define PAGE_TO_PGTBL(x) ((uint32_t)(x) >> 10) 39 | #define PAGE_TO_PGTBL_PG(x) ((uint32_t)(x) & 0x3ff) 40 | 41 | /* argument to clone_vm */ 42 | #define CLONE_SHARED 0 43 | #define CLONE_COW 1 44 | 45 | /* last argument to *_page_bit */ 46 | #define PAGE_ONLY 0 47 | #define WHOLE_PATH 1 48 | 49 | struct vm_page { 50 | int vpage, ppage; 51 | unsigned int flags; 52 | 53 | int nref; 54 | }; 55 | 56 | struct process; 57 | 58 | void init_vm(void); 59 | 60 | int map_page(int vpage, int ppage, unsigned int attr); 61 | int unmap_page(int vpage); 62 | int map_page_range(int vpg_start, int pgcount, int ppg_start, unsigned int attr); 63 | int unmap_page_range(int vpg_start, int pgcount); 64 | int map_mem_range(uint32_t vaddr, size_t sz, uint32_t paddr, unsigned int attr); 65 | 66 | uint32_t virt_to_phys(uint32_t vaddr); 67 | int virt_to_phys_page(int vpg); 68 | 69 | uint32_t virt_to_phys_proc(struct process *p, uint32_t vaddr); 70 | int virt_to_phys_page_proc(struct process *p, int vpg); 71 | 72 | enum { 73 | MEM_KERNEL, 74 | MEM_USER 75 | }; 76 | 77 | int pgalloc(int num, int area); 78 | int pgalloc_vrange(int start, int num); 79 | void pgfree(int start, int num); 80 | 81 | /* don't be fooled by the fact these two accept process arguments 82 | * they in fact work only for the "current" process (psrc and p) 83 | */ 84 | void clone_vm(struct process *pdest, struct process *psrc, int cow); 85 | void cleanup_vm(struct process *p); 86 | 87 | int get_page_bit(int pgnum, uint32_t bit, int wholepath); 88 | void set_page_bit(int pgnum, uint32_t bit, int wholepath); 89 | void clear_page_bit(int pgnum, uint32_t bit, int wholepath); 90 | 91 | /* construct the vm map for the current user mappings */ 92 | int cons_vmmap(struct rbtree *vmmap); 93 | 94 | struct vm_page *get_vm_page(int vpg); 95 | struct vm_page *get_vm_page_proc(struct process *p, int vpg); 96 | 97 | void dbg_print_vm(int area); 98 | 99 | /* defined in vm-asm.S */ 100 | void set_pgdir_addr(uint32_t addr); 101 | uint32_t get_pgdir_addr(void); 102 | 103 | #endif /* VM_H_ */ 104 | --------------------------------------------------------------------------------