├── go.mod ├── nibbler.go ├── panic └── panicwrap.go ├── .gitignore ├── USAGE.md ├── go.sum ├── disk ├── int.go ├── atokens_test.go ├── diskimage2mg.go ├── diskimagepas.go ├── diskimagerdos.go ├── atokens.go └── diskimageappledos.go ├── make.sh ├── banner.go ├── drvgeneric.go ├── drvprodos800.go ├── loggy └── logger.go ├── drvrdos.go ├── drvpascal.go ├── drvappledos13.go ├── drvappledos16.go ├── search.go ├── drvprodos16.go ├── README.md ├── ingestor.go ├── fuzzyblocks.go ├── fuzzyfiles.go ├── main.go ├── report.go └── data.go /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/paleotronic/diskm8 2 | 3 | go 1.22.3 4 | 5 | require github.com/chzyer/readline v1.5.1 6 | 7 | require golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5 // indirect 8 | -------------------------------------------------------------------------------- /nibbler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type defaultNibbler struct{} 4 | 5 | var defNibbler = &defaultNibbler{} 6 | 7 | func (d *defaultNibbler) SetNibble(index int, value byte) { 8 | 9 | } 10 | 11 | func (d *defaultNibbler) GetNibble(index int) byte { 12 | return 0 13 | } 14 | -------------------------------------------------------------------------------- /panic/panicwrap.go: -------------------------------------------------------------------------------- 1 | package panic 2 | 3 | func Do( f func(), h func(r interface{}) ) { 4 | 5 | defer func() { 6 | 7 | if r := recover(); r != nil { 8 | h(r) 9 | } 10 | 11 | }() 12 | 13 | f() 14 | 15 | } 16 | 17 | 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | # publish folder after running make.sh 17 | publish/ 18 | -------------------------------------------------------------------------------- /USAGE.md: -------------------------------------------------------------------------------- 1 | ## Usage examples 2 | 3 | Ingest your disk collection, so diskm8 can report on them: 4 | 5 | ``` 6 | diskm8 -ingest C:\Users\myname\LotsOfDisks 7 | ``` 8 | 9 | Find Whole Disk duplicates: 10 | 11 | ``` 12 | diskm8 -whole-dupes 13 | ``` 14 | 15 | Find Active Sectors duplicates (inactive sectors can be different): 16 | 17 | ``` 18 | diskm8 -as-dupes 19 | ``` 20 | 21 | Find Duplicate files across disks: 22 | 23 | ``` 24 | diskm8 -file-dupes 25 | ``` 26 | 27 | Find Active Sector duplicates but only under a folder: 28 | 29 | ``` 30 | diskm8 -as-dupes -select "C:\Users\myname\LotsOfDisks\Operating Systems" 31 | ``` 32 | 33 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/chzyer/logex v1.2.1 h1:XHDu3E6q+gdHgsdTPH6ImJMIp436vR6MPtH8gP05QzM= 2 | github.com/chzyer/logex v1.2.1/go.mod h1:JLbx6lG2kDbNRFnfkgvh4eRJRPX1QCoOIWomwysCBrQ= 3 | github.com/chzyer/readline v1.5.1 h1:upd/6fQk4src78LMRzh5vItIt361/o4uq553V8B5sGI= 4 | github.com/chzyer/readline v1.5.1/go.mod h1:Eh+b79XXUwfKfcPLepksvw2tcLE/Ct21YObkaSkeBlk= 5 | github.com/chzyer/test v1.0.0 h1:p3BQDXSxOhOG0P9z6/hGnII4LGiEPOYBhs8asl/fC04= 6 | github.com/chzyer/test v1.0.0/go.mod h1:2JlltgoNkt4TW/z9V/IzDdFaMTM2JPIi26O1pF38GC8= 7 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5 h1:y/woIyUBFbpQGKS0u1aHF/40WUDnek3fPOyD08H5Vng= 8 | golang.org/x/sys v0.0.0-20220310020820-b874c991c1a5/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 9 | -------------------------------------------------------------------------------- /disk/int.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import "time" 4 | 5 | type CatalogEntryType int 6 | 7 | const ( 8 | CETUnknown CatalogEntryType = iota 9 | CETBinary 10 | CETBasicApplesoft 11 | CETBasicInteger 12 | CETPascal 13 | CETText 14 | CETData 15 | CETGraphics 16 | ) 17 | 18 | type CatalogEntry interface { 19 | Size() int // file size in bytes 20 | Name() string 21 | NameUnadorned() string 22 | Date() time.Time 23 | Type() CatalogEntryType 24 | } 25 | 26 | type DiskImage interface { 27 | IsValid() (bool, DiskFormat, SectorOrder) 28 | GetCatalog(path string, pattern string) ([]CatalogEntry, error) 29 | ReadFile(fd CatalogEntry) (int, []byte, error) 30 | StoreFile(fd CatalogEntry) error 31 | GetUsedBitmap() ([]bool, error) 32 | Nibblize() ([]byte, error) 33 | } 34 | -------------------------------------------------------------------------------- /make.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ARCHES="darwin-amd64 darwin-arm64 windows-386 windows-amd64 windows-arm64 linux-386 linux-amd64 linux-arm freebsd-arm freebsd-amd64 freebsd-386" 4 | PUBLISH="publish" 5 | 6 | mkdir -p "$PUBLISH" 7 | 8 | exitState=0 9 | for arch in `echo $ARCHES`; do 10 | export GOOS=`echo $arch | awk -F"-" '{print $1}'` 11 | export GOARCH=`echo $arch | awk -F"-" '{print $2}'` 12 | EXENAME="diskm8" 13 | ZIPNAME="$PUBLISH/diskm8-$GOOS-$GOARCH.zip" 14 | if [ "$GOOS" == "windows" ]; then 15 | EXENAME="$EXENAME.exe" 16 | fi 17 | echo "Building $EXENAME..." 18 | go build -o "$EXENAME" . 19 | if [ "$?" == "0" ]; then 20 | echo "Zipping -> $ZIPNAME" 21 | zip "$ZIPNAME" "$EXENAME" "LICENSE" "README.md" "USAGE.md" 22 | else 23 | exit 2 24 | fi 25 | done 26 | -------------------------------------------------------------------------------- /disk/atokens_test.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | ) 7 | 8 | func TestHGR2Tokenise(t *testing.T) { 9 | 10 | lines := []string{ 11 | "10 HGR2 : REM SOMETHING", 12 | "20 REM SOMETHING ELSE", 13 | } 14 | 15 | a := ApplesoftTokenize(lines) 16 | 17 | s := string(ApplesoftDetoks(a)) 18 | 19 | t.Logf("code: %s", s) 20 | 21 | if !strings.Contains(s, "HGR2 ") { 22 | t.Fatalf("Expected HGR2") 23 | } 24 | 25 | } 26 | 27 | func TestHGRTokenise(t *testing.T) { 28 | 29 | lines := []string{ 30 | "10 HGR : REM SOMETHING", 31 | "20 REM SOMETHING ELSE", 32 | } 33 | 34 | a := ApplesoftTokenize(lines) 35 | 36 | s := string(ApplesoftDetoks(a)) 37 | 38 | t.Logf("code: %s", s) 39 | 40 | if !strings.Contains(s, "HGR ") { 41 | t.Fatalf("Expected HGR") 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /banner.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/base64" 5 | "os" 6 | ) 7 | 8 | var text = `ICBfXyAgICAgICAgICAgICAgIF9fICAgICAgICAgICAgICAgICAgX18gICAgX18gICAgIAogL1wgXCAgX18gICAgICAgICAvXCBcICAgICAgLydcXy9gXCAgLydfIGBcIC9cIFwgICAgCiBcX1wgXC9cX1wgICAgX19fX1wgXCBcLydcIC9cICAgICAgXC9cIFxMXCBcXCBcIFwgICAKIC8nX2AgXC9cIFwgIC8nLF9fXFwgXCAsIDwgXCBcIFxfX1wgXC9fPiBfIDxfXCBcIFwgIAovXCBcTFwgXCBcIFwvXF9fLCBgXFwgXCBcXGBcXCBcIFxfL1wgXC9cIFxMXCBcXCBcX1wgClwgXF9fXyxfXCBcX1wvXF9fX18vIFwgXF9cIFxfXCBcX1xcIFxfXCBcX19fXy8gXC9cX1wKIFwvX18sXyAvXC9fL1wvX19fLyAgIFwvXy9cL18vXC9fLyBcL18vXC9fX18vICAgXC9fLwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAK` 9 | 10 | func banner() { 11 | 12 | t, _ := base64.StdEncoding.DecodeString(text) 13 | 14 | os.Stderr.WriteString(string(t) + "\r\n") 15 | os.Stderr.WriteString("(c) 2015 - 2024 Paleotronic.com\n\n") 16 | os.Stderr.WriteString("type 'help' to see commands\n\n") 17 | 18 | } 19 | -------------------------------------------------------------------------------- /drvgeneric.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | 7 | "github.com/paleotronic/diskm8/disk" 8 | "github.com/paleotronic/diskm8/loggy" 9 | ) 10 | 11 | func analyzeNONE(id int, dsk *disk.DSKWrapper, info *Disk) { 12 | 13 | l := loggy.Get(id) 14 | 15 | // Sector bitmap 16 | switch len(dsk.Data) { 17 | case disk.STD_DISK_BYTES: 18 | info.Tracks = 35 19 | info.Sectors = 16 20 | case disk.STD_DISK_BYTES_OLD: 21 | info.Tracks = 35 22 | info.Sectors = 13 23 | case disk.PRODOS_800KB_DISK_BYTES: 24 | info.Tracks = disk.GetDiskFormat(disk.DF_PRODOS_800KB).TPD() 25 | info.Sectors = disk.GetDiskFormat(disk.DF_PRODOS_800KB).SPT() 26 | default: 27 | l.Errorf("Unknown size %d bytes", len(dsk.Data)) 28 | } 29 | 30 | l.Logf("Tracks: %d, Sectors: %d", info.Tracks, info.Sectors) 31 | 32 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 33 | 34 | l.Logf("Assuming all sectors might be used") 35 | info.Bitmap = make([]bool, info.Tracks*info.Sectors) 36 | for i := range info.Bitmap { 37 | info.Bitmap[i] = true 38 | } 39 | 40 | info.ActiveSectors = make(DiskSectors, 0) 41 | 42 | activeData := make([]byte, 0) 43 | 44 | for t := 0; t < info.Tracks; t++ { 45 | 46 | for s := 0; s < info.Sectors; s++ { 47 | 48 | if info.Bitmap[t*info.Sectors+s] { 49 | sector := &DiskSector{ 50 | Track: t, 51 | Sector: s, 52 | SHA256: dsk.ChecksumSector(t, s), 53 | } 54 | 55 | data := dsk.Read() 56 | activeData = append(activeData, data...) 57 | 58 | if *ingestMode&2 == 2 { 59 | sector.Data = data 60 | } 61 | 62 | info.ActiveSectors = append(info.ActiveSectors, sector) 63 | } 64 | } 65 | 66 | } 67 | 68 | sum := sha256.Sum256(activeData) 69 | info.SHA256Active = hex.EncodeToString(sum[:]) 70 | 71 | info.LogBitmap(id) 72 | 73 | // Analyzing files 74 | l.Log("Skipping Analysis of files") 75 | 76 | exists := exists(*baseName + "/" + info.GetFilename()) 77 | 78 | if !exists || *forceIngest { 79 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 80 | } else { 81 | l.Log("Not writing as it already exists") 82 | } 83 | 84 | out(dsk.Format) 85 | 86 | } 87 | -------------------------------------------------------------------------------- /drvprodos800.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | 7 | "github.com/paleotronic/diskm8/disk" 8 | "github.com/paleotronic/diskm8/loggy" 9 | ) 10 | 11 | func analyzePRODOS800(id int, dsk *disk.DSKWrapper, info *Disk) { 12 | 13 | l := loggy.Get(id) 14 | 15 | // Sector bitmap 16 | l.Logf("Reading Disk VTOC...") 17 | vtoc, err := dsk.PRODOS800GetVDH(2) 18 | if err != nil { 19 | l.Errorf("Error reading VTOC: %s", err.Error()) 20 | return 21 | } 22 | 23 | info.Blocks = vtoc.GetTotalBlocks() 24 | l.Logf("Blocks: %d", info.Blocks) 25 | 26 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 27 | 28 | info.Bitmap = make([]bool, info.Blocks) 29 | 30 | info.ActiveSectors = make(DiskSectors, 0) 31 | 32 | activeData := make([]byte, 0) 33 | 34 | vbitmap, err := dsk.PRODOS800GetVolumeBitmap() 35 | if err != nil { 36 | l.Errorf("Error reading volume bitmap: %s", err.Error()) 37 | return 38 | } 39 | 40 | l.Debug(vbitmap) 41 | 42 | for b := 0; b < info.Blocks; b++ { 43 | info.Bitmap[b] = !vbitmap.IsBlockFree(b) 44 | 45 | if info.Bitmap[b] { 46 | 47 | data, _ := dsk.PRODOS800GetBlock(b) 48 | 49 | t, s1, s2 := dsk.PRODOS800GetBlockSectors(b) 50 | 51 | sec1 := &DiskSector{ 52 | Track: t, 53 | Sector: s1, 54 | SHA256: dsk.ChecksumSector(t, s1), 55 | } 56 | 57 | sec2 := &DiskSector{ 58 | Track: t, 59 | Sector: s2, 60 | SHA256: dsk.ChecksumSector(t, s2), 61 | } 62 | 63 | if *ingestMode&2 == 2 { 64 | sec1.Data = data[:256] 65 | sec2.Data = data[256:] 66 | } 67 | 68 | info.ActiveSectors = append(info.ActiveSectors, sec1, sec2) 69 | 70 | activeData = append(activeData, data...) 71 | 72 | } else { 73 | 74 | data, _ := dsk.PRODOS800GetBlock(b) 75 | 76 | t, s1, s2 := dsk.PRODOS800GetBlockSectors(b) 77 | 78 | sec1 := &DiskSector{ 79 | Track: t, 80 | Sector: s1, 81 | SHA256: dsk.ChecksumSector(t, s1), 82 | } 83 | 84 | sec2 := &DiskSector{ 85 | Track: t, 86 | Sector: s2, 87 | SHA256: dsk.ChecksumSector(t, s2), 88 | } 89 | 90 | if *ingestMode&2 == 2 { 91 | sec1.Data = data[:256] 92 | sec2.Data = data[256:] 93 | } 94 | 95 | info.InactiveSectors = append(info.InactiveSectors, sec1, sec2) 96 | 97 | //activeData = append(activeData, data...) 98 | 99 | } 100 | 101 | } 102 | 103 | sum := sha256.Sum256(activeData) 104 | info.SHA256Active = hex.EncodeToString(sum[:]) 105 | 106 | info.LogBitmap(id) 107 | 108 | // // Analyzing files 109 | l.Log("Starting Analysis of files") 110 | 111 | info.Files = make([]*DiskFile, 0) 112 | prodosDir(id, 2, "", dsk, info) 113 | 114 | exists := exists(*baseName + "/" + info.GetFilename()) 115 | 116 | if !exists || *forceIngest { 117 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 118 | } else { 119 | l.Log("Not writing as it already exists") 120 | } 121 | 122 | out(dsk.Format) 123 | 124 | } 125 | -------------------------------------------------------------------------------- /loggy/logger.go: -------------------------------------------------------------------------------- 1 | package loggy 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strings" 7 | "time" 8 | ) 9 | 10 | var logFile *os.File 11 | var ECHO bool = false 12 | var SILENT bool = false 13 | var LogFolder string = "./logs/" 14 | 15 | type Logger struct { 16 | logFile *os.File 17 | id int 18 | app string 19 | } 20 | 21 | var loggers map[int]*Logger 22 | var app string 23 | 24 | func Get(id int) *Logger { 25 | if loggers == nil { 26 | loggers = make(map[int]*Logger) 27 | } 28 | l, ok := loggers[id] 29 | if !ok { 30 | l = NewLogger(id, app) 31 | loggers[id] = l 32 | } 33 | return l 34 | } 35 | 36 | func NewLogger(id int, app string) *Logger { 37 | 38 | if app == "" { 39 | app = "diskm8" 40 | } 41 | 42 | filename := fmt.Sprintf("%s_%d_%s.log", app, id, fts()) 43 | os.MkdirAll(LogFolder, 0755) 44 | 45 | logFile, _ = os.Create(LogFolder + filename) 46 | l := &Logger{ 47 | id: id, 48 | logFile: logFile, 49 | app: app, 50 | } 51 | 52 | return l 53 | } 54 | 55 | func ts() string { 56 | t := time.Now() 57 | return fmt.Sprintf( 58 | "%.4d/%.2d/%.2d %.2d:%.2d:%.2d", 59 | t.Year(), t.Month(), t.Day(), 60 | t.Hour(), t.Minute(), t.Second(), 61 | ) 62 | } 63 | 64 | func fts() string { 65 | t := time.Now() 66 | return fmt.Sprintf( 67 | "%.4d%.2d%.2d%.2d%.2d%.2d", 68 | t.Year(), t.Month(), t.Day(), 69 | t.Hour(), t.Minute(), t.Second(), 70 | ) 71 | } 72 | 73 | func (l *Logger) llogf(format string, designator string, v ...interface{}) { 74 | 75 | format = ts() + " " + designator + " :: " + format 76 | 77 | if !strings.HasSuffix(format, "\n") { 78 | format += "\n" 79 | } 80 | 81 | l.logFile.WriteString(fmt.Sprintf(format, v...)) 82 | l.logFile.Sync() 83 | 84 | if ECHO { 85 | os.Stderr.WriteString(fmt.Sprintf(format, v...)) 86 | } 87 | 88 | } 89 | 90 | func (l *Logger) llog(designator string, v ...interface{}) { 91 | 92 | format := ts() + " " + designator + " :: " 93 | for _, vv := range v { 94 | format += fmt.Sprintf("%v ", vv) 95 | } 96 | if !strings.HasSuffix(format, "\n") { 97 | format += "\n" 98 | } 99 | 100 | l.logFile.WriteString(format) 101 | l.logFile.Sync() 102 | 103 | if ECHO { 104 | os.Stderr.WriteString(format) 105 | } 106 | } 107 | 108 | func (l *Logger) Logf(format string, v ...interface{}) { 109 | l.llogf(format, "INFO ", v...) 110 | } 111 | 112 | func (l *Logger) Log(v ...interface{}) { 113 | l.llog("INFO ", v...) 114 | } 115 | 116 | func (l *Logger) Errorf(format string, v ...interface{}) { 117 | l.llogf(format, "ERROR", v...) 118 | } 119 | 120 | func (l *Logger) Error(v ...interface{}) { 121 | l.llog("ERROR", v...) 122 | } 123 | 124 | func (l *Logger) Debugf(format string, v ...interface{}) { 125 | l.llogf(format, "DEBUG", v...) 126 | } 127 | 128 | func (l *Logger) Debug(v ...interface{}) { 129 | l.llog("DEBUG", v...) 130 | } 131 | 132 | func (l *Logger) Fatalf(format string, v ...interface{}) { 133 | l.llogf(format, "FATAL", v...) 134 | } 135 | 136 | func (l *Logger) Fatal(v ...interface{}) { 137 | l.llog("FATAL", v...) 138 | } 139 | -------------------------------------------------------------------------------- /disk/diskimage2mg.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import "fmt" 4 | 5 | /* 6 | 2MG format loader... 7 | */ 8 | 9 | const PREAMBLE_2MG_SIZE = 0x40 10 | 11 | var MAGIC_2MG = []byte{byte('2'), byte('I'), byte('M'), byte('G')} 12 | 13 | type Header2MG struct { 14 | Data [64]byte 15 | } 16 | 17 | func (h *Header2MG) SetData(data []byte) { 18 | for i, v := range data { 19 | if i < 64 { 20 | h.Data[i] = v 21 | } 22 | } 23 | } 24 | 25 | func (h *Header2MG) GetID() string { 26 | return string(h.Data[0x00:0x04]) 27 | } 28 | 29 | func (h *Header2MG) GetCreatorID() string { 30 | return string(h.Data[0x04:0x08]) 31 | } 32 | 33 | func (h *Header2MG) GetHeaderSize() int { 34 | return int(h.Data[0x08]) + 256*int(h.Data[0x09]) 35 | } 36 | 37 | func (h *Header2MG) GetVersion() int { 38 | return int(h.Data[0x0A]) + 256*int(h.Data[0x0B]) 39 | } 40 | 41 | func (h *Header2MG) GetImageFormat() int { 42 | return int(h.Data[0x0C]) + 256*int(h.Data[0x0D]) + 65336*int(h.Data[0x0E]) + 16777216*int(h.Data[0x0F]) 43 | } 44 | 45 | func (h *Header2MG) GetDOSFlags() int { 46 | return int(h.Data[0x10]) + 256*int(h.Data[0x11]) + 65336*int(h.Data[0x12]) + 16777216*int(h.Data[0x13]) 47 | } 48 | 49 | func (h *Header2MG) GetProDOSBlocks() int { 50 | return int(h.Data[0x14]) + 256*int(h.Data[0x15]) + 65336*int(h.Data[0x16]) + 16777216*int(h.Data[0x17]) 51 | } 52 | 53 | func (h *Header2MG) GetDiskDataStart() int { 54 | return int(h.Data[0x18]) + 256*int(h.Data[0x19]) + 65336*int(h.Data[0x1A]) + 16777216*int(h.Data[0x1B]) 55 | } 56 | 57 | func (h *Header2MG) GetDiskDataLength() int { 58 | return int(h.Data[0x1C]) + 256*int(h.Data[0x1D]) + 65336*int(h.Data[0x1E]) + 16777216*int(h.Data[0x1F]) 59 | } 60 | 61 | func (dsk *DSKWrapper) Is2MG() (bool, DiskFormat, SectorOrder, *DSKWrapper) { 62 | 63 | h := &Header2MG{} 64 | h.SetData(dsk.Data[:0x40]) 65 | 66 | if h.GetID() != "2IMG" { 67 | return false, GetDiskFormat(DF_NONE), SectorOrderDOS33, nil 68 | } 69 | 70 | fmt.Println("Disk has 2MG Magic") 71 | fmt.Printf("Block count %d\n", h.GetProDOSBlocks()) 72 | 73 | start := h.GetDiskDataStart() 74 | size := h.GetDiskDataLength() 75 | 76 | if size < len(dsk.Data)-start { 77 | size = len(dsk.Data) - start 78 | } 79 | 80 | if size != STD_DISK_BYTES && size != PRODOS_800KB_DISK_BYTES && size != PRODOS_400KB_DISK_BYTES { 81 | fmt.Printf("Bad size %d bytes @ start %d\n", size, start) 82 | return false, GetDiskFormat(DF_NONE), SectorOrderDOS33, nil 83 | } 84 | 85 | data := dsk.Data[start : start+size] 86 | format := h.GetImageFormat() 87 | switch format { 88 | case 0x00: /* DOS sector order */ 89 | zdsk, _ := NewDSKWrapperBin(dsk.Nibbles, data, dsk.Filename) 90 | return true, GetDiskFormat(DF_DOS_SECTORS_16), SectorOrderDOS33, zdsk 91 | case 0x01: /* ProDOS sector order */ 92 | zdsk, _ := NewDSKWrapperBin(dsk.Nibbles, data, dsk.Filename) 93 | 94 | if h.GetProDOSBlocks() == 1600 { 95 | return true, GetDiskFormat(DF_PRODOS_800KB), SectorOrderProDOSLinear, zdsk 96 | } else if h.GetProDOSBlocks() == 800 { 97 | return true, GetDiskFormat(DF_PRODOS_400KB), SectorOrderProDOSLinear, zdsk 98 | } else { 99 | return true, GetPDDiskFormat(DF_PRODOS_CUSTOM, h.GetProDOSBlocks()), SectorOrderProDOSLinear, zdsk 100 | } 101 | 102 | } 103 | 104 | return false, GetDiskFormat(DF_NONE), SectorOrderDOS33, nil 105 | 106 | } 107 | -------------------------------------------------------------------------------- /drvrdos.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | 7 | "github.com/paleotronic/diskm8/disk" 8 | "github.com/paleotronic/diskm8/loggy" 9 | ) 10 | 11 | func analyzeRDOS(id int, dsk *disk.DSKWrapper, info *Disk) { 12 | 13 | l := loggy.Get(id) 14 | 15 | // Sector bitmap 16 | l.Logf("Reading Disk Structure...") 17 | 18 | info.Tracks, info.Sectors = 35, dsk.RDOSFormat.Spec().SectorMax 19 | 20 | l.Logf("Tracks: %d, Sectors: %d", info.Tracks, info.Sectors) 21 | 22 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 23 | 24 | info.Bitmap = make([]bool, info.Tracks*info.Sectors) 25 | 26 | info.ActiveSectors = make(DiskSectors, 0) 27 | info.InactiveSectors = make(DiskSectors, 0) 28 | 29 | activeData := make([]byte, 0) 30 | 31 | var err error 32 | info.Bitmap, err = dsk.RDOSUsedBitmap() 33 | if err != nil { 34 | l.Errorf("Error reading bitmap: %s", err.Error()) 35 | return 36 | } 37 | 38 | for t := 0; t < info.Tracks; t++ { 39 | 40 | for s := 0; s < info.Sectors; s++ { 41 | 42 | if info.Bitmap[t*info.Sectors+s] { 43 | sector := &DiskSector{ 44 | Track: t, 45 | Sector: s, 46 | SHA256: dsk.ChecksumSector(t, s), 47 | } 48 | 49 | data := dsk.Read() 50 | activeData = append(activeData, data...) 51 | 52 | if *ingestMode&2 == 2 { 53 | sector.Data = data 54 | } 55 | 56 | info.ActiveSectors = append(info.ActiveSectors, sector) 57 | } else { 58 | 59 | sector := &DiskSector{ 60 | Track: t, 61 | Sector: s, 62 | SHA256: dsk.ChecksumSector(t, s), 63 | } 64 | 65 | data := dsk.Read() 66 | if *ingestMode&2 == 2 { 67 | sector.Data = data 68 | } 69 | //activeData = append(activeData, data...) 70 | 71 | info.InactiveSectors = append(info.InactiveSectors, sector) 72 | 73 | } 74 | } 75 | 76 | } 77 | 78 | sum := sha256.Sum256(activeData) 79 | info.SHA256Active = hex.EncodeToString(sum[:]) 80 | 81 | info.LogBitmap(id) 82 | 83 | // Analyzing files 84 | l.Log("Starting Analysis of files") 85 | 86 | files, err := dsk.RDOSGetCatalog("*") 87 | if err != nil { 88 | l.Errorf("Problem reading directory: %s", err.Error()) 89 | return 90 | } 91 | 92 | info.Files = make([]*DiskFile, 0) 93 | for _, fd := range files { 94 | l.Logf("- Name=%s, Type=%s", fd.NameUnadorned(), fd.Type()) 95 | 96 | file := DiskFile{ 97 | Filename: fd.NameUnadorned(), 98 | Type: fd.Type().String(), 99 | Ext: fd.Type().Ext(), 100 | } 101 | 102 | //l.Log("start read") 103 | data, err := dsk.RDOSReadFile(fd) 104 | if err == nil { 105 | sum := sha256.Sum256(data) 106 | file.SHA256 = hex.EncodeToString(sum[:]) 107 | file.Size = len(data) 108 | if *ingestMode&1 == 1 { 109 | if fd.Type() == disk.FileType_RDOS_AppleSoft { 110 | file.Text = disk.ApplesoftDetoks(data) 111 | file.TypeCode = TypeMask_RDOS | TypeCode(fd.Type()) 112 | file.Data = data 113 | } else if fd.Type() == disk.FileType_RDOS_Text { 114 | file.Text = disk.StripText(data) 115 | file.TypeCode = TypeMask_RDOS | TypeCode(fd.Type()) 116 | file.Data = data 117 | } else { 118 | file.Data = data 119 | file.LoadAddress = fd.LoadAddress() 120 | file.TypeCode = TypeMask_RDOS | TypeCode(fd.Type()) 121 | } 122 | } 123 | } 124 | //l.Log("end read") 125 | 126 | l.Logf("FILETEXT=\n%s", dump(data)) 127 | 128 | info.Files = append(info.Files, &file) 129 | 130 | } 131 | 132 | exists := exists(*baseName + "/" + info.GetFilename()) 133 | 134 | if !exists || *forceIngest { 135 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 136 | } else { 137 | l.Log("Not writing as it already exists") 138 | } 139 | 140 | out(dsk.Format) 141 | 142 | } 143 | -------------------------------------------------------------------------------- /drvpascal.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "fmt" 7 | "time" 8 | 9 | "github.com/paleotronic/diskm8/disk" 10 | "github.com/paleotronic/diskm8/loggy" 11 | ) 12 | 13 | func dump(in []byte) string { 14 | out := "" 15 | for i, v := range in { 16 | if i%16 == 0 { 17 | if out != "" { 18 | out += "\n" 19 | } 20 | out += fmt.Sprintf("%.4x: ", i) 21 | } 22 | out += fmt.Sprintf("%.2x ", v) 23 | } 24 | out += "\n" 25 | return out 26 | } 27 | 28 | func analyzePASCAL(id int, dsk *disk.DSKWrapper, info *Disk) { 29 | 30 | l := loggy.Get(id) 31 | 32 | // Sector bitmap 33 | l.Logf("Reading Disk Structure...") 34 | 35 | info.Blocks = dsk.Format.BPD() 36 | 37 | l.Logf("Blocks: %d", info.Blocks) 38 | 39 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 40 | 41 | info.Bitmap = make([]bool, info.Tracks*info.Sectors) 42 | 43 | info.ActiveSectors = make(DiskSectors, 0) 44 | 45 | activeData := make([]byte, 0) 46 | 47 | var err error 48 | info.Bitmap, err = dsk.PascalUsedBitmap() 49 | if err != nil { 50 | l.Errorf("Error reading bitmap: %s", err.Error()) 51 | return 52 | } 53 | 54 | for b := 0; b < info.Blocks; b++ { 55 | 56 | if info.Bitmap[b] { 57 | 58 | data, _ := dsk.PRODOSGetBlock(b) 59 | 60 | t, s1, s2 := dsk.PRODOSGetBlockSectors(b) 61 | 62 | sec1 := &DiskSector{ 63 | Track: t, 64 | Sector: s1, 65 | SHA256: dsk.ChecksumSector(t, s1), 66 | } 67 | 68 | sec2 := &DiskSector{ 69 | Track: t, 70 | Sector: s2, 71 | SHA256: dsk.ChecksumSector(t, s2), 72 | } 73 | 74 | if *ingestMode&2 == 2 { 75 | sec1.Data = data[:256] 76 | sec2.Data = data[256:] 77 | } 78 | 79 | info.ActiveSectors = append(info.ActiveSectors, sec1, sec2) 80 | 81 | activeData = append(activeData, data...) 82 | 83 | } else { 84 | 85 | data, _ := dsk.PRODOSGetBlock(b) 86 | 87 | t, s1, s2 := dsk.PRODOSGetBlockSectors(b) 88 | 89 | sec1 := &DiskSector{ 90 | Track: t, 91 | Sector: s1, 92 | SHA256: dsk.ChecksumSector(t, s1), 93 | } 94 | 95 | sec2 := &DiskSector{ 96 | Track: t, 97 | Sector: s2, 98 | SHA256: dsk.ChecksumSector(t, s2), 99 | } 100 | 101 | if *ingestMode&2 == 2 { 102 | sec1.Data = data[:256] 103 | sec2.Data = data[256:] 104 | } 105 | 106 | info.InactiveSectors = append(info.InactiveSectors, sec1, sec2) 107 | 108 | //activeData = append(activeData, data...) 109 | 110 | } 111 | 112 | } 113 | 114 | sum := sha256.Sum256(activeData) 115 | info.SHA256Active = hex.EncodeToString(sum[:]) 116 | 117 | info.LogBitmap(id) 118 | 119 | // Analyzing files 120 | l.Log("Starting Analysis of files") 121 | 122 | files, err := dsk.PascalGetCatalog("*") 123 | if err != nil { 124 | l.Errorf("Problem reading directory: %s", err.Error()) 125 | return 126 | } 127 | 128 | info.Files = make([]*DiskFile, 0) 129 | for _, fd := range files { 130 | l.Logf("- Name=%s, Type=%d, Len=%d", fd.GetName(), fd.GetType(), fd.GetFileSize()) 131 | 132 | file := DiskFile{ 133 | Filename: fd.GetName(), 134 | Type: fd.GetType().String(), 135 | Locked: fd.IsLocked(), 136 | Ext: fd.GetType().Ext(), 137 | Created: time.Now(), 138 | Modified: time.Now(), 139 | } 140 | 141 | //l.Log("start read") 142 | data, err := dsk.PascalReadFile(fd) 143 | if err == nil { 144 | sum := sha256.Sum256(data) 145 | file.SHA256 = hex.EncodeToString(sum[:]) 146 | file.Size = len(data) 147 | if *ingestMode&1 == 1 { 148 | // text ingestion 149 | if fd.GetType() == disk.FileType_PAS_TEXT { 150 | file.Text = disk.StripText(data) 151 | file.Data = data 152 | file.TypeCode = TypeMask_Pascal | TypeCode(fd.GetType()) 153 | } else { 154 | file.Data = data 155 | file.TypeCode = TypeMask_Pascal | TypeCode(fd.GetType()) 156 | } 157 | } 158 | } 159 | 160 | //l.Log("end read") 161 | 162 | info.Files = append(info.Files, &file) 163 | 164 | } 165 | 166 | exists := exists(*baseName + "/" + info.GetFilename()) 167 | 168 | if !exists || *forceIngest { 169 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 170 | } else { 171 | l.Log("Not writing as it already exists") 172 | } 173 | 174 | out(dsk.Format) 175 | 176 | } 177 | -------------------------------------------------------------------------------- /drvappledos13.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "time" 7 | 8 | "github.com/paleotronic/diskm8/disk" 9 | "github.com/paleotronic/diskm8/loggy" 10 | ) 11 | 12 | func analyzeDOS13(id int, dsk *disk.DSKWrapper, info *Disk) { 13 | 14 | l := loggy.Get(id) 15 | 16 | // Sector bitmap 17 | l.Logf("Reading Disk VTOC...") 18 | vtoc, err := dsk.AppleDOSGetVTOC() 19 | if err != nil { 20 | l.Errorf("Error reading VTOC: %s", err.Error()) 21 | return 22 | } 23 | 24 | info.Tracks, info.Sectors = vtoc.GetTracks(), vtoc.GetSectors() 25 | l.Logf("Tracks: %d, Sectors: %d", info.Tracks, info.Sectors) 26 | 27 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 28 | 29 | info.Bitmap = make([]bool, info.Tracks*info.Sectors) 30 | 31 | info.ActiveSectors = make(DiskSectors, 0) 32 | info.InactiveSectors = make(DiskSectors, 0) 33 | 34 | activeData := make([]byte, 0) 35 | 36 | for t := 0; t < info.Tracks; t++ { 37 | 38 | for s := 0; s < info.Sectors; s++ { 39 | info.Bitmap[t*info.Sectors+s] = !vtoc.IsTSFree(t, s) 40 | 41 | // checksum sector 42 | //info.SectorFingerprints[dsk.ChecksumSector(t, s)] = &DiskBlock{Track: t, Sector: s} 43 | 44 | if info.Bitmap[t*info.Sectors+s] { 45 | sector := &DiskSector{ 46 | Track: t, 47 | Sector: s, 48 | SHA256: dsk.ChecksumSector(t, s), 49 | } 50 | 51 | data := dsk.Read() 52 | activeData = append(activeData, data...) 53 | 54 | if *ingestMode&2 == 2 { 55 | sector.Data = data 56 | } 57 | 58 | info.ActiveSectors = append(info.ActiveSectors, sector) 59 | } else { 60 | sector := &DiskSector{ 61 | Track: t, 62 | Sector: s, 63 | SHA256: dsk.ChecksumSector(t, s), 64 | } 65 | 66 | data := dsk.Read() 67 | if *ingestMode&2 == 2 { 68 | sector.Data = data 69 | } 70 | //activeData = append(activeData, data...) 71 | 72 | info.InactiveSectors = append(info.InactiveSectors, sector) 73 | } 74 | } 75 | 76 | } 77 | 78 | sum := sha256.Sum256(activeData) 79 | info.SHA256Active = hex.EncodeToString(sum[:]) 80 | 81 | info.LogBitmap(id) 82 | 83 | // Analyzing files 84 | l.Log("Starting Analysis of files") 85 | 86 | vtoc, files, err := dsk.AppleDOSGetCatalog("*") 87 | if err != nil { 88 | l.Errorf("Problem reading directory: %s", err.Error()) 89 | return 90 | } 91 | 92 | info.Files = make([]*DiskFile, 0) 93 | for _, fd := range files { 94 | l.Logf("- Name=%s, Type=%s", fd.NameUnadorned(), fd.Type()) 95 | 96 | file := DiskFile{ 97 | Filename: fd.NameUnadorned(), 98 | Type: fd.Type().String(), 99 | Locked: fd.IsLocked(), 100 | Ext: fd.Type().Ext(), 101 | Created: time.Now(), 102 | Modified: time.Now(), 103 | } 104 | 105 | size, addr, data, err := dsk.AppleDOSReadFileRaw(fd) 106 | if err == nil { 107 | sum := sha256.Sum256(data) 108 | file.SHA256 = hex.EncodeToString(sum[:]) 109 | file.Size = size 110 | if *ingestMode&1 == 1 { 111 | if fd.Type() == disk.FileTypeAPP { 112 | file.Text = disk.ApplesoftDetoks(data) 113 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 114 | file.Data = data 115 | file.LoadAddress = 0x801 116 | } else if fd.Type() == disk.FileTypeINT { 117 | file.Text = disk.IntegerDetoks(data) 118 | file.Data = data 119 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 120 | file.LoadAddress = 0x1000 121 | } else if fd.Type() == disk.FileTypeTXT { 122 | file.Text = disk.StripText(data) 123 | file.Data = data 124 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 125 | file.LoadAddress = 0x0000 126 | } else if fd.Type() == disk.FileTypeBIN && len(data) >= 2 { 127 | file.LoadAddress = addr 128 | file.Data = data 129 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 130 | } else { 131 | file.LoadAddress = 0x0000 132 | file.Data = data 133 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 134 | } 135 | } 136 | } 137 | 138 | info.Files = append(info.Files, &file) 139 | 140 | } 141 | 142 | exists := exists(*baseName + "/" + info.GetFilename()) 143 | 144 | if !exists || *forceIngest { 145 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 146 | } else { 147 | l.Log("Not writing as it already exists") 148 | } 149 | 150 | out(dsk.Format) 151 | } 152 | -------------------------------------------------------------------------------- /drvappledos16.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | "time" 7 | 8 | "github.com/paleotronic/diskm8/disk" 9 | "github.com/paleotronic/diskm8/loggy" 10 | ) 11 | 12 | func analyzeDOS16(id int, dsk *disk.DSKWrapper, info *Disk) { 13 | 14 | l := loggy.Get(id) 15 | 16 | // Sector bitmap 17 | l.Logf("Reading Disk VTOC...") 18 | vtoc, err := dsk.AppleDOSGetVTOC() 19 | if err != nil { 20 | l.Errorf("Error reading VTOC: %s", err.Error()) 21 | return 22 | } 23 | 24 | info.Tracks, info.Sectors = vtoc.GetTracks(), vtoc.GetSectors() 25 | 26 | if vtoc.BytesPerSector() != 256 { 27 | l.Errorf("Disk does not seem to be AppleDOS - treat as generic") 28 | dsk.Format = disk.GetDiskFormat(disk.DF_NONE) 29 | analyzeNONE(id, dsk, info) 30 | return 31 | } 32 | 33 | l.Logf("Tracks: %d, Sectors: %d", info.Tracks, info.Sectors) 34 | l.Logf("Sector order: %d", vtoc.GetTrackOrder()) 35 | 36 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 37 | 38 | info.Bitmap = make([]bool, info.Tracks*info.Sectors) 39 | 40 | var useAlt bool 41 | 42 | if vtoc.IsTSFree(17, 0) { 43 | info.Bitmap, _ = dsk.AppleDOSUsedBitmap() 44 | useAlt = true 45 | } 46 | 47 | info.ActiveSectors = make(DiskSectors, 0) 48 | info.InactiveSectors = make(DiskSectors, 0) 49 | 50 | activeData := make([]byte, 0) 51 | 52 | for t := 0; t < info.Tracks; t++ { 53 | 54 | for s := 0; s < info.Sectors; s++ { 55 | 56 | if !useAlt { 57 | info.Bitmap[t*info.Sectors+s] = !vtoc.IsTSFree(t, s) 58 | } 59 | 60 | // checksum sector 61 | //info.SectorFingerprints[dsk.ChecksumSector(t, s)] = &DiskBlock{Track: t, Sector: s} 62 | 63 | if info.Bitmap[t*info.Sectors+s] { 64 | sector := &DiskSector{ 65 | Track: t, 66 | Sector: s, 67 | SHA256: dsk.ChecksumSector(t, s), 68 | } 69 | 70 | data := dsk.Read() 71 | activeData = append(activeData, data...) 72 | 73 | if *ingestMode&2 == 2 { 74 | sector.Data = data 75 | } 76 | 77 | info.ActiveSectors = append(info.ActiveSectors, sector) 78 | } else { 79 | sector := &DiskSector{ 80 | Track: t, 81 | Sector: s, 82 | SHA256: dsk.ChecksumSector(t, s), 83 | } 84 | 85 | data := dsk.Read() 86 | if *ingestMode&2 == 2 { 87 | sector.Data = data 88 | } 89 | //activeData = append(activeData, data...) 90 | 91 | info.InactiveSectors = append(info.InactiveSectors, sector) 92 | } 93 | } 94 | 95 | } 96 | 97 | sum := sha256.Sum256(activeData) 98 | info.SHA256Active = hex.EncodeToString(sum[:]) 99 | 100 | info.LogBitmap(id) 101 | 102 | // Analyzing files 103 | l.Log("Starting Analysis of files") 104 | 105 | // lines := []string{ 106 | // "10 PRINT \"HELLO WORLDS\"", 107 | // "20 GOTO 10", 108 | // } 109 | 110 | // e := dsk.AppleDOSWriteFile("CHEESE", disk.FileTypeAPP, disk.ApplesoftTokenize(lines), 0x801) 111 | // if e != nil { 112 | // l.Errorf("Error writing file: %s", e.Error()) 113 | // panic(e) 114 | // } 115 | // f, _ := os.Create("out.dsk") 116 | // f.Write(dsk.Data) 117 | // f.Close() 118 | 119 | vtoc, files, err := dsk.AppleDOSGetCatalog("*") 120 | if err != nil { 121 | l.Errorf("Problem reading directory: %s", err.Error()) 122 | return 123 | } 124 | 125 | info.Files = make([]*DiskFile, 0) 126 | for _, fd := range files { 127 | l.Logf("- Name=%s, Type=%s", fd.NameUnadorned(), fd.Type()) 128 | 129 | file := DiskFile{ 130 | Filename: fd.NameUnadorned(), 131 | Type: fd.Type().String(), 132 | Locked: fd.IsLocked(), 133 | Ext: fd.Type().Ext(), 134 | Created: time.Now(), 135 | Modified: time.Now(), 136 | } 137 | 138 | //l.Log("start read") 139 | size, addr, data, err := dsk.AppleDOSReadFileRaw(fd) 140 | if err == nil { 141 | sum := sha256.Sum256(data) 142 | file.SHA256 = hex.EncodeToString(sum[:]) 143 | file.Size = size 144 | if *ingestMode&1 == 1 { 145 | if fd.Type() == disk.FileTypeAPP { 146 | file.Text = disk.ApplesoftDetoks(data) 147 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 148 | file.Data = data 149 | file.LoadAddress = 0x801 150 | } else if fd.Type() == disk.FileTypeINT { 151 | file.Text = disk.IntegerDetoks(data) 152 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 153 | file.LoadAddress = 0x1000 154 | file.Data = data 155 | } else if fd.Type() == disk.FileTypeTXT { 156 | file.Text = disk.StripText(data) 157 | file.Data = data 158 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 159 | file.LoadAddress = 0x0000 160 | } else if fd.Type() == disk.FileTypeBIN && len(data) >= 2 { 161 | file.LoadAddress = addr 162 | file.Data = data 163 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 164 | } else { 165 | file.LoadAddress = 0x0000 166 | file.Data = data 167 | file.TypeCode = TypeMask_AppleDOS | TypeCode(fd.Type()) 168 | } 169 | } 170 | } 171 | //l.Log("end read") 172 | 173 | info.Files = append(info.Files, &file) 174 | 175 | } 176 | 177 | exists := exists(*baseName + "/" + info.GetFilename()) 178 | 179 | if !exists || *forceIngest { 180 | info.WriteToFile(*baseName + "/" + info.GetFilename()) 181 | } else { 182 | l.Log("Not writing as it already exists") 183 | } 184 | 185 | out(dsk.Format) 186 | } 187 | -------------------------------------------------------------------------------- /search.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | ) 10 | 11 | type SearchResultContext int 12 | 13 | const ( 14 | SRC_UNKNOWN SearchResultContext = iota 15 | SRC_FILE 16 | SRC_DISK 17 | ) 18 | 19 | type SearchResultItem struct { 20 | DiskPath string 21 | File *DiskFile 22 | } 23 | 24 | func searchForFilename(filename string, filter []string) { 25 | 26 | fd := GetAllFiles("*_*_*_*.fgp", filter) 27 | 28 | fmt.Printf("Filter: %s\n", filter) 29 | 30 | fmt.Println() 31 | fmt.Println() 32 | 33 | fmt.Printf("SEARCH RESULTS FOR '%s'\n", filename) 34 | 35 | fmt.Println() 36 | 37 | for diskname, list := range fd { 38 | //fmt.Printf("Checking: %s\n", diskname) 39 | for _, f := range list { 40 | if strings.Contains(strings.ToLower(f.Filename), strings.ToLower(filename)) { 41 | fmt.Printf("%32s:\n %s (%s, %d bytes, sha: %s)\n\n", diskname, f.Filename, f.Type, f.Size, f.SHA256) 42 | if *extract == "@" { 43 | ExtractFile(diskname, f, *adornedCP, false) 44 | } else if *extract == "#" { 45 | ExtractDisk(diskname) 46 | } 47 | } 48 | } 49 | } 50 | 51 | } 52 | 53 | func searchForSHA256(sha string, filter []string) { 54 | 55 | fd := GetAllFiles("*_*_*_*.fgp", filter) 56 | 57 | fmt.Println() 58 | fmt.Println() 59 | 60 | fmt.Printf("SEARCH RESULTS FOR SHA256 '%s'\n", sha) 61 | 62 | fmt.Println() 63 | 64 | for diskname, list := range fd { 65 | for _, f := range list { 66 | if f.SHA256 == sha { 67 | fmt.Printf("%32s:\n %s (%s, %d bytes, sha: %s)\n\n", diskname, f.Filename, f.Type, f.Size, f.SHA256) 68 | if *extract == "@" { 69 | ExtractFile(diskname, f, *adornedCP, false) 70 | } else if *extract == "#" { 71 | ExtractDisk(diskname) 72 | } 73 | } 74 | } 75 | } 76 | 77 | } 78 | 79 | func searchForTEXT(text string, filter []string) { 80 | 81 | fd := GetAllFiles("*_*_*_*.fgp", filter) 82 | 83 | fmt.Println() 84 | fmt.Println() 85 | 86 | fmt.Printf("SEARCH RESULTS FOR TEXT CONTENT '%s'\n", text) 87 | 88 | fmt.Println() 89 | 90 | for diskname, list := range fd { 91 | for _, f := range list { 92 | if strings.Contains(strings.ToLower(string(f.Text)), strings.ToLower(text)) { 93 | fmt.Printf("%32s:\n %s (%s, %d bytes, sha: %s)\n\n", diskname, f.Filename, f.Type, f.Size, f.SHA256) 94 | if *extract == "@" { 95 | ExtractFile(diskname, f, *adornedCP, false) 96 | } else if *extract == "#" { 97 | ExtractDisk(diskname) 98 | } 99 | } 100 | } 101 | } 102 | 103 | } 104 | 105 | func directory(filter []string, format string) { 106 | 107 | fd := GetAllFiles("*_*_*_*.fgp", filter) 108 | 109 | fmt.Println() 110 | fmt.Println() 111 | 112 | fmt.Println() 113 | 114 | for diskname, list := range fd { 115 | fmt.Printf("CATALOG RESULTS FOR '%s'\n", diskname) 116 | //fmt.Printf("Checking: %s\n", diskname) 117 | out := "" 118 | for _, file := range list { 119 | tmp := format 120 | // size 121 | tmp = strings.Replace(tmp, "{size:blocks}", fmt.Sprintf("%3d Blocks", file.Size/256+1), -1) 122 | tmp = strings.Replace(tmp, "{size:kb}", fmt.Sprintf("%4d Kb", file.Size/1024+1), -1) 123 | tmp = strings.Replace(tmp, "{size:b}", fmt.Sprintf("%6d Bytes", file.Size), -1) 124 | tmp = strings.Replace(tmp, "{size}", fmt.Sprintf("%6d", file.Size), -1) 125 | // format 126 | tmp = strings.Replace(tmp, "{filename}", fmt.Sprintf("%-36s", file.Filename), -1) 127 | // type 128 | tmp = strings.Replace(tmp, "{type}", fmt.Sprintf("%-20s", file.Type), -1) 129 | // sha256 130 | tmp = strings.Replace(tmp, "{sha256}", file.SHA256, -1) 131 | 132 | out += tmp + "\n" 133 | 134 | if *extract == "@" { 135 | ExtractFile(diskname, file, *adornedCP, false) 136 | } else if *extract == "#" { 137 | ExtractDisk(diskname) 138 | } 139 | } 140 | fmt.Println(out + "\n\n") 141 | } 142 | 143 | } 144 | 145 | var fileExtractCounter int 146 | 147 | func ExtractFile(diskname string, fd *DiskFile, adorned bool, local bool) error { 148 | 149 | var name string 150 | 151 | if adorned { 152 | name = fd.GetNameAdorned() 153 | } else { 154 | name = fd.GetName() 155 | } 156 | 157 | path := binpath() + "/extract" + diskname 158 | 159 | if local { 160 | ext := filepath.Ext(diskname) 161 | base := strings.Replace(filepath.Base(diskname), ext, "", -1) 162 | path = "./" + base 163 | } 164 | 165 | if path != "." { 166 | os.MkdirAll(path, 0755) 167 | } 168 | 169 | //fmt.Printf("FD.EXT=%s\n", fd.Ext) 170 | 171 | f, err := os.Create(path + "/" + name) 172 | if err != nil { 173 | return err 174 | } 175 | defer f.Close() 176 | f.Write(fd.Data) 177 | os.Stderr.WriteString("Extracted file to " + path + "/" + name + "\n") 178 | 179 | if strings.ToLower(fd.Ext) == "int" || strings.ToLower(fd.Ext) == "bas" || strings.ToLower(fd.Ext) == "txt" { 180 | f, err := os.Create(path + "/" + name + ".ASC") 181 | if err != nil { 182 | return err 183 | } 184 | defer f.Close() 185 | f.Write(fd.Text) 186 | os.Stderr.WriteString("Extracted file to " + path + "/" + name + ".ASC\n") 187 | } 188 | 189 | //os.Stderr.WriteString("Extracted file to " + path + "/" + name) 190 | 191 | fileExtractCounter++ 192 | 193 | return nil 194 | 195 | } 196 | 197 | func ExtractDisk(diskname string) error { 198 | path := binpath() + "/extract" + diskname 199 | os.MkdirAll(path, 0755) 200 | data, err := ioutil.ReadFile(diskname) 201 | if err != nil { 202 | return err 203 | } 204 | target := path + "/" + filepath.Base(diskname) 205 | return ioutil.WriteFile(target, data, 0755) 206 | } 207 | -------------------------------------------------------------------------------- /drvprodos16.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | 7 | "github.com/paleotronic/diskm8/disk" 8 | "github.com/paleotronic/diskm8/loggy" 9 | ) 10 | 11 | func analyzePRODOS16(id int, dsk *disk.DSKWrapper, info *Disk) { 12 | 13 | l := loggy.Get(id) 14 | 15 | isPD, Format, Layout := dsk.IsProDOS() 16 | l.Logf("IsProDOS=%v, Format=%s, Layout=%d", isPD, Format, Layout) 17 | if isPD { 18 | dsk.Layout = Layout 19 | } 20 | 21 | // Sector bitmap 22 | l.Logf("Reading Disk VTOC...") 23 | vtoc, err := dsk.PRODOSGetVDH(2) 24 | if err != nil { 25 | l.Errorf("Error reading VTOC: %s", err.Error()) 26 | return 27 | } 28 | 29 | info.Blocks = vtoc.GetTotalBlocks() 30 | 31 | l.Logf("Filecount: %d", vtoc.GetFileCount()) 32 | 33 | l.Logf("Blocks: %d", info.Blocks) 34 | 35 | l.Logf("Reading sector bitmap and SHA256'ing sectors") 36 | 37 | info.Bitmap = make([]bool, info.Blocks) 38 | 39 | info.ActiveSectors = make(DiskSectors, 0) 40 | 41 | activeData := make([]byte, 0) 42 | 43 | vbitmap, err := dsk.PRODOSGetVolumeBitmap() 44 | if err != nil { 45 | l.Errorf("Error reading volume bitmap: %s", err.Error()) 46 | return 47 | } 48 | 49 | l.Debug(vbitmap) 50 | 51 | for b := 0; b < info.Blocks; b++ { 52 | info.Bitmap[b] = !vbitmap.IsBlockFree(b) 53 | 54 | if info.Bitmap[b] { 55 | 56 | data, _ := dsk.PRODOSGetBlock(b) 57 | 58 | t, s1, s2 := dsk.PRODOSGetBlockSectors(b) 59 | 60 | sec1 := &DiskSector{ 61 | Track: t, 62 | Sector: s1, 63 | SHA256: dsk.ChecksumSector(t, s1), 64 | } 65 | 66 | sec2 := &DiskSector{ 67 | Track: t, 68 | Sector: s2, 69 | SHA256: dsk.ChecksumSector(t, s2), 70 | } 71 | 72 | if *ingestMode&2 == 2 { 73 | sec1.Data = data[:256] 74 | sec2.Data = data[256:] 75 | } 76 | 77 | info.ActiveSectors = append(info.ActiveSectors, sec1, sec2) 78 | 79 | activeData = append(activeData, data...) 80 | 81 | } else { 82 | 83 | data, _ := dsk.PRODOSGetBlock(b) 84 | 85 | t, s1, s2 := dsk.PRODOSGetBlockSectors(b) 86 | 87 | sec1 := &DiskSector{ 88 | Track: t, 89 | Sector: s1, 90 | SHA256: dsk.ChecksumSector(t, s1), 91 | } 92 | 93 | sec2 := &DiskSector{ 94 | Track: t, 95 | Sector: s2, 96 | SHA256: dsk.ChecksumSector(t, s2), 97 | } 98 | 99 | if *ingestMode&2 == 2 { 100 | sec1.Data = data[:256] 101 | sec2.Data = data[256:] 102 | } 103 | 104 | info.InactiveSectors = append(info.InactiveSectors, sec1, sec2) 105 | 106 | //activeData = append(activeData, data...) 107 | 108 | } 109 | 110 | } 111 | 112 | sum := sha256.Sum256(activeData) 113 | info.SHA256Active = hex.EncodeToString(sum[:]) 114 | 115 | info.LogBitmap(id) 116 | 117 | // // Analyzing files 118 | l.Log("Starting Analysis of files") 119 | 120 | prodosDir(id, 2, "", dsk, info) 121 | 122 | exists := exists(*baseName + "/" + info.GetFilename()) 123 | 124 | if !exists || *forceIngest { 125 | e := info.WriteToFile(*baseName + "/" + info.GetFilename()) 126 | if e != nil { 127 | l.Errorf("Error writing fingerprint: %v", e) 128 | panic(e) 129 | } 130 | } else { 131 | l.Log("Not writing as it already exists") 132 | } 133 | 134 | out(dsk.Format) 135 | 136 | } 137 | 138 | func prodosDir(id int, start int, path string, dsk *disk.DSKWrapper, info *Disk) { 139 | 140 | l := loggy.Get(id) 141 | 142 | _, files, err := dsk.PRODOSGetCatalog(start, "*") 143 | if err != nil { 144 | l.Errorf("Problem reading directory: %s", err.Error()) 145 | return 146 | } 147 | if info.Files == nil { 148 | info.Files = make([]*DiskFile, 0) 149 | } 150 | for _, fd := range files { 151 | l.Logf("- Path=%s, Name=%s, Type=%s", path, fd.NameUnadorned(), fd.Type()) 152 | 153 | var file DiskFile 154 | 155 | if path == "" { 156 | 157 | file = DiskFile{ 158 | Filename: fd.NameUnadorned(), 159 | Type: fd.Type().String(), 160 | Locked: fd.IsLocked(), 161 | Ext: fd.Type().Ext(), 162 | Created: fd.CreateTime(), 163 | Modified: fd.ModTime(), 164 | } 165 | 166 | } else { 167 | 168 | file = DiskFile{ 169 | Filename: path + "/" + fd.NameUnadorned(), 170 | Type: fd.Type().String(), 171 | Locked: fd.IsLocked(), 172 | Ext: fd.Type().Ext(), 173 | Created: fd.CreateTime(), 174 | Modified: fd.ModTime(), 175 | } 176 | 177 | } 178 | 179 | if fd.Type() != disk.FileType_PD_Directory { 180 | _, _, data, err := dsk.PRODOSReadFileRaw(fd) 181 | if err == nil { 182 | sum := sha256.Sum256(data) 183 | file.SHA256 = hex.EncodeToString(sum[:]) 184 | file.Size = len(data) 185 | if *ingestMode&1 == 1 { 186 | if fd.Type() == disk.FileType_PD_APP { 187 | file.Text = disk.ApplesoftDetoks(data) 188 | file.TypeCode = TypeMask_ProDOS | TypeCode(fd.Type()) 189 | file.Data = data 190 | file.LoadAddress = fd.AuxType() 191 | } else if fd.Type() == disk.FileType_PD_INT { 192 | file.Text = disk.IntegerDetoks(data) 193 | file.TypeCode = TypeMask_ProDOS | TypeCode(fd.Type()) 194 | file.Data = data 195 | file.LoadAddress = fd.AuxType() 196 | } else if fd.Type() == disk.FileType_PD_TXT { 197 | file.Text = disk.StripText(data) 198 | file.Data = data 199 | file.TypeCode = TypeMask_ProDOS | TypeCode(fd.Type()) 200 | file.LoadAddress = fd.AuxType() 201 | } else { 202 | file.LoadAddress = fd.AuxType() 203 | file.Data = data 204 | file.TypeCode = TypeMask_ProDOS | TypeCode(fd.Type()) 205 | } 206 | } 207 | } 208 | } 209 | 210 | info.Files = append(info.Files, &file) 211 | 212 | if fd.Type() == disk.FileType_PD_Directory { 213 | newpath := path 214 | if path != "" { 215 | newpath += "/" + fd.NameUnadorned() 216 | } else { 217 | newpath = fd.NameUnadorned() 218 | } 219 | prodosDir(id, fd.IndexBlock(), newpath, dsk, info) 220 | } 221 | 222 | } 223 | 224 | } 225 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | DiskM8 is a cross-platform command-line tool for manipulating and managing Apple II DSK (and other) images. 2 | 3 | Download from: https://github.com/paleotronic/diskm8/releases 4 | 5 | Features include: 6 | 7 | - Read from ProDOS, DOS 3.X, RDOS and Pascal disk images; 8 | - ProDOS or DOS ordered; DSK, PO, 2MG and NIB; 113-800K 9 | - Write to Prodos and DOS 3.3 disk images; 10 | - Extract and convert binary, text and detokenize BASIC files (Integer and Applesoft); 11 | - Write binary, text and retokenized BASIC (Applesoft) files back to disk images; 12 | - Copy and move files between disk images; delete files, create new folders (ProDOS), etc; 13 | - Generate disk reports that provide track and sector information, text extraction and more; 14 | - Compare multiple disks to determine duplication, or search disks for text or filenames. 15 | - Use command-line flags (allows for automation) or an interactive shell; 16 | - Builds for MacOS, Windows, Linux, FreeBSD and Raspberry Pi. 17 | - Open source; GPLv3 licensed. 18 | - Written in Go! 19 | 20 | DiskM8 is a command line tool for analyzing and managing Apple II DSK images and their archives. Its features include not only the standard set of disk manipulation tools -- extract (with text conversion), import to disk (including tokenisation of Applesoft BASIC), delete, and so forth -- but also the ability to identify duplicates — complete, active sector, and subset; find file, sector and other commonalities between disks (including as a percentage of similarity or difference); search de-tokenized BASIC, text and binary / sector data; generate reports identifying and / or collating disk type, DOS, geometry, size, and so forth; allowing for easier, semi-automated DSK archival management and research. 21 | 22 | DiskM8 works by first “ingesting” your disk(s), creating an index containing various pieces of information (disk / sector / file hashes, catalogs, text data, binary data etc.) about each disk that is then searchable using the same tool. This way you can easily find information stored on disks without tediously searching manually or through time-consuming multiple image scans. You can also identify duplicates, quasi-duplicates (disks with only minor differences or extraneous data), or iterations, reducing redundancies. 23 | 24 | Once you've identified a search you can also extract selected files. DiskM8 can report to standard output (terminal), to a text file, or to a CSV file. 25 | ``` 26 | Shell commands (executing DiskM8 without flags enters shell): 27 | 28 | Note: You must mount a disk before performing tasks on it. 29 | 30 | analyze Process disk using diskm8 analytics 31 | cat Display file information 32 | cd Change local path 33 | copy Copy files from one volume to another 34 | delete Remove file from disk 35 | disks List mounted volumes 36 | extract extract file from disk image 37 | help Shows this help 38 | info Information about the current disk 39 | ingest Ingest directory containing disks (or single disk) into system 40 | lock Lock file on the disk 41 | ls List local files 42 | mkdir Create a directory on disk 43 | mount Mount a disk image 44 | move Move files from one volume to another 45 | prefix Change volume path 46 | put Copy local file to disk (with optional target dir) 47 | quarantine Like report, but allow moving dupes to a backup folder 48 | quit Leave this place 49 | rename Rename a file on the disk 50 | report Run a report 51 | search Run a search 52 | target Select mounted volume as default 53 | unlock Unlock file on the disk 54 | unmount unmount disk image 55 | 56 | Command-line flags: 57 | 58 | (Note: You must ingest your disk library before you can run comparison or search operations on it) 59 | 60 | -active-sector-partial 61 | Run partial sector match (active only) against all disks 62 | -active-sector-subset 63 | Run subset (active) sector match against all disks 64 | -adorned 65 | Extract files named similar to CP (default true) 66 | -all-file-partial 67 | Run partial file match against all disks 68 | -all-file-subset 69 | Run subset file match against all disks 70 | -all-sector-partial 71 | Run partial sector match (all) against all disks 72 | -all-sector-subset 73 | Run subset (non-zero) sector match against all disks 74 | -as-dupes 75 | Run active sectors only disk dupe report 76 | -as-partial 77 | Run partial active sector match against single disk (-disk required) 78 | -c Cache data to memory for quicker processing (default true) 79 | -cat-dupes 80 | Run duplicate catalog report 81 | -catalog 82 | List disk contents (-with-disk) 83 | -csv 84 | Output data to CSV format 85 | -datastore string 86 | Database of disk fingerprints for checking (default "/home/myname/DiskM8/fingerprints") 87 | -dir 88 | Directory specified disk (needs -disk) 89 | -dir-create string 90 | Directory to create (-with-disk) 91 | -dir-format string 92 | Format of dir (default "{filename} {type} {size:kb} Checksum: {sha256}") 93 | -extract string 94 | Extract files/disks matched in searches ('#'=extract disk, '@'=extract files) 95 | -file string 96 | Search for other disks containing file 97 | -file-delete string 98 | File to delete (-with-disk) 99 | -file-dupes 100 | Run file dupe report 101 | -file-extract string 102 | File to delete from disk (-with-disk) 103 | -file-partial 104 | Run partial file match against single disk (-disk required) 105 | -file-put string 106 | File to put on disk (-with-disk) 107 | -force 108 | Force re-ingest disks that already exist 109 | -ingest string 110 | Disk file or path to ingest 111 | -ingest-mode int 112 | Ingest mode: 113 | 0=Fingerprints only 114 | 1=Fingerprints + text 115 | 2=Fingerprints + sector data 116 | 3=All (default 1) 117 | -max-diff int 118 | Maximum different # files for -all-file-partial 119 | -min-same int 120 | Minimum same # files for -all-file-partial 121 | -out string 122 | Output file (empty for stdout) 123 | -quarantine 124 | Run -as-dupes and -whole-disk in quarantine mode 125 | -query string 126 | Disk file to query or analyze 127 | -search-filename string 128 | Search database for file with name 129 | -search-sha string 130 | Search database for file with checksum 131 | -search-text string 132 | Search database for file containing text 133 | -select 134 | Select files for analysis or search based on file/dir/mask 135 | -shell 136 | Start interactive mode 137 | -shell-batch string 138 | Execute shell command(s) from file and exit 139 | -similarity float 140 | Object match threshold for -*-partial reports (default 0.9) 141 | -verbose 142 | Log to stderr 143 | -whole-dupes 144 | Run whole disk dupe report 145 | -with-disk string 146 | Perform disk operation (-file-extract,-file-put,-file-delete) 147 | -with-path string 148 | Target path for disk operation (-file-extract,-file-put,-file-delete) 149 | ``` 150 | 151 | ## Getting Started 152 | 153 | Ingest your disk collection, so diskm8 can report on them: 154 | 155 | ```diskm8 -ingest "C:\Users\myname\LotsOfDisks"``` 156 | 157 | ### Simple Reports 158 | 159 | Find Whole Disk duplicates: 160 | 161 | ```diskm8 -whole-dupes``` 162 | 163 | Find Active Sectors duplicates (inactive sectors can be different): 164 | 165 | ```diskm8 -as-dupes``` 166 | 167 | Find Duplicate files across disks: 168 | 169 | ```diskm8 -file-dupes``` 170 | 171 | ### Limiting reports to subdirectories 172 | 173 | Find Active Sector duplicates but only under a folder: 174 | 175 | ```diskm8 -as-dupes -select "C:\Users\myname\LotsOfDisks\Operating Systems"``` 176 | 177 | ### Putting a file onto a disk in a particular path 178 | 179 | ```diskm8 -with-disk prodos_basic.dsk -with-path practice -file-put start#0x0801.BAS``` 180 | -------------------------------------------------------------------------------- /disk/diskimagepas.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "errors" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | const PASCAL_BLOCK_SIZE = 512 10 | const PASCAL_VOLUME_BLOCK = 2 11 | const PASCAL_MAX_VOLUME_NAME = 7 12 | const PASCAL_DIRECTORY_ENTRY_LENGTH = 26 13 | const PASCAL_OVERSIZE_DIR = 32 14 | 15 | func (dsk *DSKWrapper) IsPascal() (bool, string) { 16 | 17 | dsk.Format = GetDiskFormat(DF_PRODOS) 18 | 19 | data, err := dsk.PRODOSGetBlock(PASCAL_VOLUME_BLOCK) 20 | if err != nil { 21 | return false, "" 22 | } 23 | 24 | if !(data[0x00] == 0 && data[0x01] == 0) || 25 | !(data[0x04] == 0 && data[0x05] == 0) || 26 | !(data[0x06] > 0 && data[0x06] <= PASCAL_MAX_VOLUME_NAME) { 27 | return false, "" 28 | } 29 | 30 | l := int(data[0x06]) 31 | name := data[0x07 : 0x07+l] 32 | 33 | str := "" 34 | for _, ch := range name { 35 | if ch == 0x00 { 36 | break 37 | } 38 | if ch < 0x20 || ch >= 0x7f { 39 | return false, "" 40 | } 41 | 42 | if strings.Contains("$=?,[#:", string(ch)) { 43 | return false, "" 44 | } 45 | 46 | str += string(ch) 47 | } 48 | 49 | return true, str 50 | 51 | } 52 | 53 | type PascalVolumeHeader struct { 54 | data [PASCAL_DIRECTORY_ENTRY_LENGTH]byte 55 | } 56 | 57 | func (pvh *PascalVolumeHeader) SetData(data []byte) { 58 | for i, v := range data { 59 | if i < len(pvh.data) { 60 | pvh.data[i] = v 61 | } 62 | } 63 | } 64 | 65 | func (pvh *PascalVolumeHeader) GetStartBlock() int { 66 | return int(pvh.data[0x00]) + 256*int(pvh.data[0x01]) 67 | } 68 | 69 | func (pvh *PascalVolumeHeader) GetNextBlock() int { 70 | return int(pvh.data[0x02]) + 256*int(pvh.data[0x03]) 71 | } 72 | 73 | type PascalFileType int 74 | 75 | const ( 76 | FileType_PAS_NONE PascalFileType = 0 77 | FileType_PAS_BADD PascalFileType = 1 78 | FileType_PAS_CODE PascalFileType = 2 79 | FileType_PAS_TEXT PascalFileType = 3 80 | FileType_PAS_INFO PascalFileType = 4 81 | FileType_PAS_DATA PascalFileType = 5 82 | FileType_PAS_GRAF PascalFileType = 6 83 | FileType_PAS_FOTO PascalFileType = 7 84 | FileType_PAS_SECD PascalFileType = 8 85 | ) 86 | 87 | var PascalTypeMap = map[PascalFileType][2]string{ 88 | 0x00: [2]string{"UNK", "ASCII Text"}, 89 | 0x01: [2]string{"BAD", "Bad Block"}, 90 | 0x02: [2]string{"PCD", "Pascal Code"}, 91 | 0x03: [2]string{"PTX", "Pascal Text"}, 92 | 0x04: [2]string{"PIF", "Pascal Info"}, 93 | 0x05: [2]string{"PDA", "Pascal Data"}, 94 | 0x06: [2]string{"GRF", "Pascal Graphics"}, 95 | 0x07: [2]string{"FOT", "HiRes Graphics"}, 96 | 0x08: [2]string{"SEC", "Secure Directory"}, 97 | } 98 | 99 | func (ft PascalFileType) String() string { 100 | 101 | info, ok := PascalTypeMap[ft] 102 | if ok { 103 | return info[1] 104 | } 105 | 106 | return "Unknown" 107 | 108 | } 109 | 110 | func (ft PascalFileType) Ext() string { 111 | 112 | info, ok := PascalTypeMap[ft] 113 | if ok { 114 | return info[0] 115 | } 116 | 117 | return "UNK" 118 | 119 | } 120 | 121 | func PascalFileTypeFromExt(ext string) PascalFileType { 122 | for ft, info := range PascalTypeMap { 123 | if strings.ToUpper(ext) == info[0] { 124 | return ft 125 | } 126 | } 127 | return 0x00 128 | } 129 | 130 | func (pvh *PascalVolumeHeader) GetType() int { 131 | return int(int(pvh.data[0x04]) + 256*int(pvh.data[0x05])) 132 | } 133 | 134 | func (pvh *PascalVolumeHeader) GetNameLength() int { 135 | return int(pvh.data[0x06]) & 0x07 136 | } 137 | 138 | func (pvh *PascalVolumeHeader) GetName() string { 139 | l := pvh.GetNameLength() 140 | s := strings.Trim(string(pvh.data[0x07:0x07+l]), " ") 141 | s += "." + PascalFileType(pvh.GetType()).Ext() 142 | return s 143 | } 144 | 145 | func (pvh *PascalVolumeHeader) GetTotalBlocks() int { 146 | return int(pvh.data[0x0e]) + 256*int(pvh.data[0x0f]) 147 | } 148 | 149 | func (pvh *PascalVolumeHeader) GetNumFiles() int { 150 | return int(pvh.data[0x10]) + 256*int(pvh.data[0x11]) 151 | } 152 | 153 | type PascalFileEntry struct { 154 | data [PASCAL_DIRECTORY_ENTRY_LENGTH]byte 155 | } 156 | 157 | func (pfe *PascalFileEntry) SetData(data []byte) { 158 | for i, v := range data { 159 | if i < len(pfe.data) { 160 | pfe.data[i] = v 161 | } 162 | } 163 | } 164 | 165 | func (pvh *PascalFileEntry) IsLocked() bool { 166 | return true 167 | } 168 | 169 | func (pvh *PascalFileEntry) GetStartBlock() int { 170 | return int(pvh.data[0x00]) + 256*int(pvh.data[0x01]) 171 | } 172 | 173 | func (pvh *PascalFileEntry) GetNextBlock() int { 174 | return int(pvh.data[0x02]) + 256*int(pvh.data[0x03]) 175 | } 176 | 177 | func (pvh *PascalFileEntry) GetType() PascalFileType { 178 | return PascalFileType(int(pvh.data[0x04]) + 256*int(pvh.data[0x05])) 179 | } 180 | 181 | func (pvh *PascalFileEntry) GetNameLength() int { 182 | return int(pvh.data[0x06]) & 0x0f 183 | } 184 | 185 | func (pvh *PascalFileEntry) GetName() string { 186 | l := pvh.GetNameLength() 187 | return strings.Trim(string(pvh.data[0x07:0x07+l]), "") 188 | } 189 | 190 | func (pvh *PascalFileEntry) GetBytesRemaining() int { 191 | return int(pvh.data[0x16]) + 256*int(pvh.data[0x17]) 192 | } 193 | 194 | func (pvh *PascalFileEntry) GetFileSize() int { 195 | return pvh.GetBytesRemaining() + (pvh.GetNextBlock()-pvh.GetStartBlock()-1)*PASCAL_BLOCK_SIZE 196 | } 197 | 198 | func (dsk *DSKWrapper) PascalGetCatalog(pattern string) ([]*PascalFileEntry, error) { 199 | 200 | pattern = strings.Replace(pattern, ".", "[.]", -1) 201 | pattern = strings.Replace(pattern, "*", ".*", -1) 202 | pattern = strings.Replace(pattern, "?", ".", -1) 203 | 204 | rx := regexp.MustCompile("(?i)" + pattern) 205 | 206 | files := make([]*PascalFileEntry, 0) 207 | 208 | // 209 | 210 | d, err := dsk.PRODOSGetBlock(PASCAL_VOLUME_BLOCK) 211 | if err != nil { 212 | return nil, err 213 | } 214 | 215 | pvh := &PascalVolumeHeader{} 216 | pvh.SetData(d) 217 | numBlocks := pvh.GetNextBlock() - PASCAL_VOLUME_BLOCK 218 | 219 | if numBlocks < 0 || numBlocks > PASCAL_OVERSIZE_DIR { 220 | return files, errors.New("Directory appears corrupt") 221 | } 222 | 223 | // disk catalog is okay 224 | catdata := make([]byte, 0) 225 | for block := PASCAL_VOLUME_BLOCK; block < PASCAL_VOLUME_BLOCK+numBlocks; block++ { 226 | data, err := dsk.PRODOSGetBlock(block) 227 | if err != nil { 228 | return files, err 229 | } 230 | catdata = append(catdata, data...) 231 | } 232 | 233 | dirPtr := PASCAL_DIRECTORY_ENTRY_LENGTH 234 | for i := 0; i < pvh.GetNumFiles(); i++ { 235 | b := catdata[dirPtr : dirPtr+PASCAL_DIRECTORY_ENTRY_LENGTH] 236 | fd := &PascalFileEntry{} 237 | fd.SetData(b) 238 | // add file 239 | 240 | if rx.MatchString(fd.GetName()) { 241 | 242 | files = append(files, fd) 243 | 244 | } 245 | 246 | // move 247 | dirPtr += PASCAL_DIRECTORY_ENTRY_LENGTH 248 | } 249 | 250 | return files, nil 251 | 252 | } 253 | 254 | func (dsk *DSKWrapper) PascalUsedBitmap() ([]bool, error) { 255 | 256 | activeBlocks := dsk.Format.BPD() 257 | 258 | used := make([]bool, activeBlocks) 259 | 260 | files, err := dsk.PascalGetCatalog("*") 261 | if err != nil { 262 | return used, err 263 | } 264 | 265 | for _, file := range files { 266 | 267 | length := file.GetNextBlock() - file.GetStartBlock() 268 | start := file.GetStartBlock() 269 | if start+length > activeBlocks { 270 | continue // file is bad 271 | } 272 | 273 | for block := start; block < start+length; block++ { 274 | used[block] = true 275 | } 276 | 277 | } 278 | 279 | return used, nil 280 | 281 | } 282 | 283 | func (dsk *DSKWrapper) PascalReadFile(file *PascalFileEntry) ([]byte, error) { 284 | 285 | activeSectors := dsk.Format.BPD() 286 | 287 | length := file.GetNextBlock() - file.GetStartBlock() 288 | start := file.GetStartBlock() 289 | 290 | // If file is damaged return nothing 291 | if start+length > activeSectors { 292 | return []byte(nil), nil 293 | } 294 | 295 | block := start 296 | data := make([]byte, 0) 297 | for block < start+length && len(data) < file.GetFileSize() { 298 | 299 | chunk, err := dsk.PRODOSGetBlock(block) 300 | if err != nil { 301 | return data, err 302 | } 303 | needed := file.GetFileSize() - len(data) 304 | if needed >= PASCAL_BLOCK_SIZE { 305 | data = append(data, chunk...) 306 | } else { 307 | data = append(data, chunk[:needed]...) 308 | } 309 | 310 | block++ 311 | 312 | } 313 | 314 | return data, nil 315 | 316 | } 317 | -------------------------------------------------------------------------------- /disk/diskimagerdos.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "bytes" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | const RDOS_CATALOG_TRACK = 0x01 10 | const RDOS_CATALOG_LENGTH = 0xB 11 | const RDOS_ENTRY_LENGTH = 0x20 12 | const RDOS_NAME_LENGTH = 0x18 13 | 14 | var RDOS_SIGNATURE = []byte{ 15 | byte('R' + 0x80), 16 | byte('D' + 0x80), 17 | byte('O' + 0x80), 18 | byte('S' + 0x80), 19 | byte(' ' + 0x80), 20 | } 21 | 22 | var RDOS_SIGNATURE_32 = []byte{ 23 | byte('R' + 0x80), 24 | byte('D' + 0x80), 25 | byte('O' + 0x80), 26 | byte('S' + 0x80), 27 | byte(' ' + 0x80), 28 | byte('2' + 0x80), 29 | } 30 | 31 | var RDOS_SIGNATURE_33 = []byte{ 32 | byte('R' + 0x80), 33 | byte('D' + 0x80), 34 | byte('O' + 0x80), 35 | byte('S' + 0x80), 36 | byte(' ' + 0x80), 37 | byte('3' + 0x80), 38 | } 39 | 40 | type RDOSFormatSpec struct { 41 | SectorStride int 42 | SectorMax int 43 | CatalogTrack int 44 | CatalogSector int 45 | Ordering SectorOrder 46 | } 47 | 48 | type RDOSFormat int 49 | 50 | const ( 51 | RDOS_Unknown RDOSFormat = iota 52 | RDOS_3 53 | RDOS_32 54 | RDOS_33 55 | ) 56 | 57 | func (f RDOSFormat) String() string { 58 | 59 | switch f { 60 | case RDOS_3: 61 | return "RDOS3" 62 | case RDOS_32: 63 | return "RDOS32" 64 | case RDOS_33: 65 | return "RDOS33" 66 | } 67 | 68 | return "Unknown" 69 | 70 | } 71 | 72 | func (f RDOSFormat) Spec() *RDOSFormatSpec { 73 | 74 | switch f { 75 | case RDOS_32: 76 | return &RDOSFormatSpec{ 77 | SectorStride: 16, 78 | SectorMax: 13, 79 | CatalogTrack: 1, 80 | CatalogSector: 0, 81 | Ordering: SectorOrderDOS33, 82 | } 83 | case RDOS_3: 84 | return &RDOSFormatSpec{ 85 | SectorStride: 16, 86 | SectorMax: 13, 87 | CatalogTrack: 1, 88 | CatalogSector: 0, 89 | Ordering: SectorOrderDOS33, 90 | } 91 | case RDOS_33: 92 | return &RDOSFormatSpec{ 93 | SectorStride: 16, 94 | SectorMax: 16, 95 | CatalogTrack: 1, 96 | CatalogSector: 12, 97 | Ordering: SectorOrderProDOS, 98 | } 99 | } 100 | return nil 101 | 102 | } 103 | 104 | func (dsk *DSKWrapper) IsRDOS() (bool, RDOSFormat) { 105 | 106 | // It needs to be either 140K or 113K 107 | if len(dsk.Data) != STD_DISK_BYTES && len(dsk.Data) != STD_DISK_BYTES_OLD { 108 | return false, RDOS_Unknown 109 | } 110 | 111 | sectorStride := (len(dsk.Data) / STD_TRACKS_PER_DISK) / 256 112 | 113 | idbytes := dsk.Data[sectorStride*256 : sectorStride*256+6] 114 | 115 | if bytes.Compare(idbytes, RDOS_SIGNATURE_32) == 0 && sectorStride == 13 { 116 | return true, RDOS_32 117 | } 118 | 119 | if bytes.Compare(idbytes, RDOS_SIGNATURE_32) == 0 && sectorStride == 16 { 120 | return true, RDOS_3 121 | } 122 | 123 | if bytes.Compare(idbytes, RDOS_SIGNATURE_33) == 0 && sectorStride == 16 { 124 | return true, RDOS_33 125 | } 126 | 127 | return false, RDOS_Unknown 128 | 129 | } 130 | 131 | type RDOSFileDescriptor struct { 132 | data [RDOS_ENTRY_LENGTH]byte 133 | } 134 | 135 | func (fd *RDOSFileDescriptor) SetData(in []byte) { 136 | for i, b := range in { 137 | if i < RDOS_ENTRY_LENGTH { 138 | fd.data[i] = b 139 | } 140 | } 141 | } 142 | 143 | func (fd *RDOSFileDescriptor) IsDeleted() bool { 144 | 145 | return fd.data[24] == 0xa0 || fd.data[0] == 0x80 146 | 147 | } 148 | 149 | func (fd *RDOSFileDescriptor) IsUnused() bool { 150 | 151 | return fd.data[24] == 0x00 152 | 153 | } 154 | 155 | func (fd *RDOSFileDescriptor) IsLocked() bool { 156 | return true 157 | } 158 | 159 | type RDOSFileType int 160 | 161 | const ( 162 | FileType_RDOS_Unknown RDOSFileType = iota 163 | FileType_RDOS_AppleSoft 164 | FileType_RDOS_Binary 165 | FileType_RDOS_Text 166 | ) 167 | 168 | var RDOSTypeMap = map[RDOSFileType][2]string{ 169 | FileType_RDOS_Unknown: [2]string{"UNK", "Unknown"}, 170 | FileType_RDOS_AppleSoft: [2]string{"BAS", "Applesoft Basic Program"}, 171 | FileType_RDOS_Binary: [2]string{"BIN", "Binary File"}, 172 | FileType_RDOS_Text: [2]string{"TXT", "ASCII Text"}, 173 | } 174 | 175 | func (ft RDOSFileType) String() string { 176 | info, ok := RDOSTypeMap[ft] 177 | if ok { 178 | return info[1] 179 | } 180 | return "Unknown" 181 | } 182 | 183 | func (ft RDOSFileType) Ext() string { 184 | info, ok := RDOSTypeMap[ft] 185 | if ok { 186 | return info[0] 187 | } 188 | return "UNK" 189 | } 190 | 191 | func RDOSFileTypeFromExt(ext string) RDOSFileType { 192 | for ft, info := range RDOSTypeMap { 193 | if strings.ToUpper(ext) == info[0] { 194 | return ft 195 | } 196 | } 197 | return 0x00 198 | } 199 | 200 | func (fd *RDOSFileDescriptor) Type() RDOSFileType { 201 | 202 | switch rune(fd.data[24]) { 203 | case 'A' + 0x80: 204 | return FileType_RDOS_AppleSoft 205 | case 'B' + 0x80: 206 | return FileType_RDOS_Binary 207 | case 'T' + 0x80: 208 | return FileType_RDOS_Text 209 | } 210 | 211 | return FileType_RDOS_Unknown 212 | 213 | } 214 | 215 | func (fd *RDOSFileDescriptor) Name() string { 216 | 217 | str := "" 218 | for i := 0; i < RDOS_NAME_LENGTH; i++ { 219 | 220 | ch := rune(fd.data[i] & 127) 221 | if ch == 0 { 222 | break 223 | } 224 | str += string(ch) 225 | 226 | } 227 | 228 | str = strings.TrimRight(str, " ") 229 | switch fd.Type() { 230 | case FileType_RDOS_AppleSoft: 231 | str += ".bas" 232 | case FileType_RDOS_Binary: 233 | str += ".bin" 234 | case FileType_RDOS_Text: 235 | str += ".txt" 236 | } 237 | 238 | return str 239 | 240 | } 241 | 242 | func (fd *RDOSFileDescriptor) NameUnadorned() string { 243 | 244 | str := "" 245 | for i := 0; i < RDOS_NAME_LENGTH; i++ { 246 | 247 | ch := rune(fd.data[i] & 127) 248 | if ch == 0 { 249 | break 250 | } 251 | str += string(ch) 252 | 253 | } 254 | 255 | return strings.TrimSpace(str) 256 | 257 | } 258 | 259 | func (fd RDOSFileDescriptor) NumSectors() int { 260 | return int(fd.data[25]) 261 | } 262 | 263 | func (fd RDOSFileDescriptor) LoadAddress() int { 264 | return int(fd.data[26]) + 256*int(fd.data[27]) 265 | } 266 | 267 | func (fd RDOSFileDescriptor) StartSector() int { 268 | return int(fd.data[30]) + 256*int(fd.data[31]) 269 | } 270 | 271 | func (fd RDOSFileDescriptor) Length() int { 272 | return int(fd.data[28]) + 256*int(fd.data[29]) 273 | } 274 | 275 | func (dsk *DSKWrapper) RDOSGetCatalog(pattern string) ([]*RDOSFileDescriptor, error) { 276 | 277 | pattern = strings.Replace(pattern, ".", "[.]", -1) 278 | pattern = strings.Replace(pattern, "*", ".*", -1) 279 | pattern = strings.Replace(pattern, "?", ".", -1) 280 | 281 | rx := regexp.MustCompile("(?i)" + pattern) 282 | 283 | var files = make([]*RDOSFileDescriptor, 0) 284 | 285 | d := make([]byte, 0) 286 | 287 | for s := 0; s < RDOS_CATALOG_LENGTH; s++ { 288 | dsk.SetTrack(1) 289 | dsk.SetSector(s) 290 | chunk := dsk.Read() 291 | d = append(d, chunk...) 292 | } 293 | 294 | var dirPtr int 295 | for i := 0; i < RDOS_CATALOG_LENGTH*RDOS_ENTRY_LENGTH; i++ { 296 | entry := &RDOSFileDescriptor{} 297 | entry.SetData(d[dirPtr : dirPtr+RDOS_ENTRY_LENGTH]) 298 | 299 | dirPtr += RDOS_ENTRY_LENGTH 300 | 301 | if entry.IsUnused() { 302 | break 303 | } 304 | 305 | if !entry.IsDeleted() && rx.MatchString(entry.NameUnadorned()) { 306 | files = append(files, entry) 307 | } 308 | 309 | } 310 | 311 | return files, nil 312 | 313 | } 314 | 315 | func (dsk *DSKWrapper) RDOSUsedBitmap() ([]bool, error) { 316 | 317 | spt := dsk.RDOSFormat.Spec().SectorMax 318 | activeSectors := spt * 35 319 | 320 | used := make([]bool, activeSectors) 321 | 322 | files, err := dsk.RDOSGetCatalog("*") 323 | if err != nil { 324 | return used, err 325 | } 326 | 327 | for _, file := range files { 328 | 329 | length := file.NumSectors() 330 | start := file.StartSector() 331 | if start+length > activeSectors { 332 | continue // file is bad 333 | } 334 | 335 | for block := start; block < start+length; block++ { 336 | used[block] = true 337 | } 338 | 339 | } 340 | 341 | return used, nil 342 | 343 | } 344 | 345 | func (dsk *DSKWrapper) RDOSReadFile(file *RDOSFileDescriptor) ([]byte, error) { 346 | 347 | spt := dsk.RDOSFormat.Spec().SectorMax 348 | activeSectors := spt * 35 349 | 350 | length := file.NumSectors() 351 | start := file.StartSector() 352 | 353 | // If file is damaged return nothing 354 | if start+length > activeSectors { 355 | return []byte(nil), nil 356 | } 357 | 358 | block := start 359 | data := make([]byte, 0) 360 | for block < start+length && len(data) < file.Length() { 361 | 362 | track := block / spt 363 | sector := block % spt 364 | 365 | dsk.SetTrack(track) 366 | dsk.SetSector(sector) 367 | 368 | chunk := dsk.Read() 369 | needed := file.Length() - len(data) 370 | if needed >= 256 { 371 | data = append(data, chunk...) 372 | } else { 373 | data = append(data, chunk[:needed]...) 374 | } 375 | 376 | block++ 377 | 378 | } 379 | 380 | return data, nil 381 | 382 | } 383 | -------------------------------------------------------------------------------- /ingestor.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/hex" 5 | "fmt" 6 | "os" 7 | "path" 8 | "path/filepath" 9 | "regexp" 10 | "runtime" 11 | "runtime/debug" 12 | "sync" 13 | "time" 14 | 15 | "strings" 16 | 17 | "crypto/md5" 18 | 19 | "github.com/paleotronic/diskm8/disk" 20 | "github.com/paleotronic/diskm8/loggy" 21 | "github.com/paleotronic/diskm8/panic" 22 | ) 23 | 24 | var diskRegex = regexp.MustCompile("(?i)[.](po|do|dsk)$") 25 | 26 | func processFile(path string, info os.FileInfo, err error) error { 27 | if err != nil { 28 | loggy.Get(0).Errorf(err.Error()) 29 | return err 30 | } 31 | 32 | if diskRegex.MatchString(path) { 33 | 34 | incoming <- path 35 | 36 | fmt.Printf("\rIngested: %d volumes ...", processed) 37 | 38 | } 39 | 40 | return nil 41 | } 42 | 43 | const loaderWorkers = 8 44 | 45 | var incoming chan string 46 | var processed int 47 | var errorcount int 48 | var indisk map[disk.DiskFormat]int 49 | var outdisk map[disk.DiskFormat]int 50 | var cm sync.Mutex 51 | 52 | func init() { 53 | indisk = make(map[disk.DiskFormat]int) 54 | outdisk = make(map[disk.DiskFormat]int) 55 | } 56 | 57 | func in(f disk.DiskFormat) { 58 | cm.Lock() 59 | indisk[f] = indisk[f] + 1 60 | cm.Unlock() 61 | } 62 | 63 | func out(f disk.DiskFormat) { 64 | cm.Lock() 65 | outdisk[f] = outdisk[f] + 1 66 | cm.Unlock() 67 | } 68 | 69 | func walk(dir string) { 70 | 71 | start := time.Now() 72 | 73 | incoming = make(chan string, 16) 74 | indisk = make(map[disk.DiskFormat]int) 75 | outdisk = make(map[disk.DiskFormat]int) 76 | 77 | var wg sync.WaitGroup 78 | var s sync.Mutex 79 | 80 | for i := 0; i < loaderWorkers; i++ { 81 | wg.Add(1) 82 | go func(i int) { 83 | 84 | id := 1 + i 85 | l := loggy.Get(id) 86 | 87 | for filename := range incoming { 88 | 89 | panic.Do( 90 | func() { 91 | analyze(id, filename) 92 | s.Lock() 93 | processed++ 94 | s.Unlock() 95 | }, 96 | func(r interface{}) { 97 | l.Errorf("Error processing volume: %s", filename) 98 | l.Errorf(string(debug.Stack())) 99 | s.Lock() 100 | errorcount++ 101 | s.Unlock() 102 | }, 103 | ) 104 | 105 | } 106 | 107 | wg.Done() 108 | 109 | }(i) 110 | } 111 | 112 | filepath.Walk(dir, processFile) 113 | 114 | close(incoming) 115 | wg.Wait() 116 | 117 | fmt.Printf("\rIngested: %d volumes ...", processed) 118 | 119 | fmt.Println() 120 | 121 | duration := time.Since(start) 122 | 123 | fmt.Println("=============================================================") 124 | fmt.Printf(" DSKalyzer process report (%d Workers, %v)\n", loaderWorkers, duration) 125 | fmt.Println("=============================================================") 126 | 127 | tin, tout := 0, 0 128 | 129 | for f, count := range indisk { 130 | outcount := outdisk[f] 131 | fmt.Printf("%-30s %6d in %6d out\n", f.String(), count, outcount) 132 | tin += count 133 | tout += outcount 134 | } 135 | 136 | fmt.Println() 137 | 138 | fmt.Printf("%-30s %6d in %6d out\n", "Total", tin, tout) 139 | 140 | fmt.Println() 141 | 142 | average := duration / time.Duration(processed+errorcount) 143 | 144 | fmt.Printf("%v average time spent per disk.\n", average) 145 | } 146 | 147 | func existsPatternOld(base string, pattern string) (bool, []string) { 148 | 149 | l := loggy.Get(0) 150 | 151 | p := base + "/" + pattern 152 | 153 | l.Logf("glob: %s", p) 154 | 155 | matches, _ := filepath.Glob(p) 156 | 157 | return (len(matches) > 0), matches 158 | 159 | } 160 | 161 | func resolvePathfilters(base string, pathfilter []string, pattern string) []*regexp.Regexp { 162 | 163 | tmp := strings.Replace(pattern, ".", "[.]", -1) 164 | tmp = strings.Replace(tmp, "?", ".", -1) 165 | tmp = strings.Replace(tmp, "*", ".+", -1) 166 | tmp += "$" 167 | 168 | // pathfilter either contains filenames or a pattern (eg. if it was quoted) 169 | var out []*regexp.Regexp 170 | for _, p := range pathfilter { 171 | 172 | if runtime.GOOS == "windows" { 173 | //p = strings.Replace(p, ":", "", -1) 174 | p = strings.Replace(p, "\\", "/", -1) 175 | } 176 | 177 | //fmt.Printf("Stat [%s]\n", p) 178 | 179 | p, e := filepath.Abs(p) 180 | if e != nil { 181 | continue 182 | } 183 | 184 | //fmt.Printf("OK\n") 185 | 186 | // path is okay and now absolute 187 | info, e := os.Stat(p) 188 | if e != nil { 189 | continue 190 | } 191 | 192 | if runtime.GOOS == "windows" { 193 | p = strings.Replace(p, ":", "", -1) 194 | p = strings.Replace(p, "\\", "/", -1) 195 | } 196 | 197 | var realpath string 198 | if info.IsDir() { 199 | realpath = strings.Replace(base, "\\", "/", -1) + "/" + strings.Trim(p, "/") + "/" + tmp 200 | } else { 201 | // file 202 | b := strings.Trim(filepath.Base(p), " /") 203 | s := md5.Sum([]byte(b)) 204 | realpath = strings.Replace(base, "\\", "/", -1) + "/" + strings.Trim(filepath.Dir(p), "/") + "/.+_.+_.+_" + hex.EncodeToString(s[:]) + "[.]fgp$" 205 | } 206 | 207 | //fmt.Printf("Regexp [%s]\n", realpath) 208 | 209 | out = append(out, regexp.MustCompile(realpath)) 210 | 211 | } 212 | 213 | return out 214 | 215 | } 216 | 217 | func existsPattern(base string, filters []string, pattern string) (bool, []string) { 218 | 219 | tmp := strings.Replace(pattern, ".", "[.]", -1) 220 | tmp = strings.Replace(tmp, "?", ".", -1) 221 | tmp = strings.Replace(tmp, "*", ".+", -1) 222 | tmp = "(?i)" + tmp + "$" 223 | //os.Stderr.WriteString("Globby is: " + tmp + "\r\n") 224 | fileRxp := regexp.MustCompile(tmp) 225 | 226 | var out []string 227 | var found bool 228 | 229 | processPatternPath := func(path string, info os.FileInfo, err error) error { 230 | 231 | l := loggy.Get(0) 232 | 233 | if err != nil { 234 | l.Errorf(err.Error()) 235 | return err 236 | } 237 | 238 | if fileRxp.MatchString(filepath.Base(path)) { 239 | found = true 240 | out = append(out, path) 241 | } 242 | 243 | return nil 244 | } 245 | 246 | filepath.Walk(base, processPatternPath) 247 | 248 | fexp := resolvePathfilters(base, filters, pattern) 249 | 250 | if len(fexp) > 0 { 251 | out2 := make([]string, 0) 252 | for _, p := range out { 253 | 254 | if runtime.GOOS == "windows" { 255 | p = strings.Replace(p, "\\", "/", -1) 256 | } 257 | 258 | for _, rxp := range fexp { 259 | //fmt.Printf("Match [%s]\n", p) 260 | if rxp.MatchString(p) { 261 | out2 = append(out2, p) 262 | //fmt.Printf("Match regexp: %s\n", p) 263 | break 264 | } 265 | } 266 | } 267 | //fmt.Printf("%d returns\n", len(out2)) 268 | return (len(out2) > 0), out2 269 | } 270 | 271 | //fmt.Printf("%d returns\n", len(out)) 272 | 273 | return found, out 274 | 275 | } 276 | 277 | func analyze(id int, filename string) (*Disk, error) { 278 | 279 | l := loggy.Get(id) 280 | 281 | var err error 282 | var dsk *disk.DSKWrapper 283 | var dskInfo Disk = Disk{} 284 | 285 | dskInfo.Filename = path.Base(filename) 286 | 287 | if abspath, e := filepath.Abs(filename); e == nil { 288 | filename = abspath 289 | } 290 | 291 | dskInfo.FullPath = path.Clean(filename) 292 | 293 | l.Logf("Reading disk image from file source %s", filename) 294 | //fmt.Printf("Processing %s\n", filename) 295 | //fmt.Print(".") 296 | 297 | dsk, err = disk.NewDSKWrapper(defNibbler, filename) 298 | 299 | if err != nil { 300 | l.Errorf("Disk read failed: %s", err) 301 | return &dskInfo, err 302 | } 303 | 304 | if dsk.Format.ID == disk.DF_DOS_SECTORS_13 || dsk.Format.ID == disk.DF_DOS_SECTORS_16 { 305 | isADOS, _, _ := dsk.IsAppleDOS() 306 | if !isADOS { 307 | dsk.Format.ID = disk.DF_NONE 308 | dsk.Layout = disk.SectorOrderDOS33 309 | } 310 | } 311 | // fmt.Printf("%s: IsAppleDOS=%v, Format=%s, Layout=%d\n", path.Base(filename), isADOS, Format, Layout) 312 | 313 | l.Log("Load is OK.") 314 | 315 | dskInfo.SHA256 = dsk.ChecksumDisk() 316 | l.Logf("SHA256 is %s", dskInfo.SHA256) 317 | 318 | dskInfo.Format = dsk.Format.String() 319 | dskInfo.FormatID = dsk.Format 320 | l.Logf("Format is %s", dskInfo.Format) 321 | 322 | l.Debugf("TOSO MAGIC: %v", hex.EncodeToString(dsk.Data[:32])) 323 | 324 | t, s := dsk.HuntVTOC(35, 13) 325 | l.Logf("Hunt VTOC says: %d, %d", t, s) 326 | 327 | // Check if it exists 328 | 329 | in(dsk.Format) 330 | 331 | dskInfo.IngestMode = *ingestMode 332 | 333 | switch dsk.Format.ID { 334 | case disk.DF_DOS_SECTORS_16: 335 | analyzeDOS16(id, dsk, &dskInfo) 336 | case disk.DF_DOS_SECTORS_13: 337 | analyzeDOS13(id, dsk, &dskInfo) 338 | case disk.DF_PRODOS_400KB: 339 | analyzePRODOS800(id, dsk, &dskInfo) 340 | case disk.DF_PRODOS_800KB: 341 | analyzePRODOS800(id, dsk, &dskInfo) 342 | case disk.DF_PRODOS: 343 | analyzePRODOS16(id, dsk, &dskInfo) 344 | case disk.DF_RDOS_3: 345 | analyzeRDOS(id, dsk, &dskInfo) 346 | case disk.DF_RDOS_32: 347 | analyzeRDOS(id, dsk, &dskInfo) 348 | case disk.DF_RDOS_33: 349 | analyzeRDOS(id, dsk, &dskInfo) 350 | case disk.DF_PASCAL: 351 | analyzePASCAL(id, dsk, &dskInfo) 352 | default: 353 | analyzeNONE(id, dsk, &dskInfo) 354 | } 355 | 356 | return &dskInfo, nil 357 | 358 | } 359 | -------------------------------------------------------------------------------- /fuzzyblocks.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/signal" 7 | "sync" 8 | ) 9 | 10 | const EMPTYSECTOR = "5341e6b2646979a70e57653007a1f310169421ec9bdd9f1a5648f75ade005af1" 11 | 12 | func GetAllDiskSectors(pattern string, pathfilter []string) map[string]DiskSectors { 13 | 14 | cache := make(map[string]DiskSectors) 15 | 16 | exists, matches := existsPattern(*baseName, pathfilter, pattern) 17 | if !exists { 18 | return cache 19 | } 20 | 21 | workchan := make(chan string, 100) 22 | var s sync.Mutex 23 | var wg sync.WaitGroup 24 | 25 | for i := 0; i < ingestWorkers; i++ { 26 | wg.Add(1) 27 | go func() { 28 | for m := range workchan { 29 | item := &Disk{} 30 | if err := item.ReadFromFile(m); err == nil { 31 | 32 | // 33 | chunk := append(item.ActiveSectors, item.InactiveSectors...) 34 | tmp := make(DiskSectors, 0) 35 | for _, v := range chunk { 36 | if v.SHA256 != EMPTYSECTOR { 37 | tmp = append(tmp, v) 38 | } else { 39 | fmt.Printf("%s: throw away zero sector T%d,S%d\n", item.Filename, v.Track, v.Sector) 40 | } 41 | } 42 | 43 | // Load cache 44 | s.Lock() 45 | cache[item.FullPath] = tmp 46 | s.Unlock() 47 | 48 | } 49 | } 50 | wg.Done() 51 | }() 52 | } 53 | 54 | var lastPc int = -1 55 | for i, m := range matches { 56 | 57 | workchan <- m 58 | 59 | pc := int(100 * float64(i) / float64(len(matches))) 60 | 61 | if pc != lastPc { 62 | fmt.Print("\r") 63 | os.Stderr.WriteString(fmt.Sprintf("Caching disk sector data... %d%% ", pc)) 64 | } 65 | 66 | lastPc = pc 67 | } 68 | close(workchan) 69 | 70 | wg.Wait() 71 | 72 | return cache 73 | } 74 | 75 | func GetActiveDiskSectors(pattern string, pathfilter []string) map[string]DiskSectors { 76 | 77 | cache := make(map[string]DiskSectors) 78 | 79 | exists, matches := existsPattern(*baseName, pathfilter, pattern) 80 | if !exists { 81 | return cache 82 | } 83 | 84 | workchan := make(chan string, 100) 85 | var s sync.Mutex 86 | var wg sync.WaitGroup 87 | 88 | for i := 0; i < ingestWorkers; i++ { 89 | wg.Add(1) 90 | go func() { 91 | for m := range workchan { 92 | item := &Disk{} 93 | if err := item.ReadFromFile(m); err == nil { 94 | 95 | // Load cache 96 | s.Lock() 97 | cache[item.FullPath] = item.ActiveSectors 98 | s.Unlock() 99 | 100 | } 101 | } 102 | wg.Done() 103 | }() 104 | } 105 | 106 | var lastPc int = -1 107 | for i, m := range matches { 108 | 109 | workchan <- m 110 | 111 | pc := int(100 * float64(i) / float64(len(matches))) 112 | 113 | if pc != lastPc { 114 | fmt.Print("\r") 115 | os.Stderr.WriteString(fmt.Sprintf("Caching disk sector data... %d%% ", pc)) 116 | } 117 | 118 | lastPc = pc 119 | } 120 | close(workchan) 121 | 122 | wg.Wait() 123 | 124 | return cache 125 | } 126 | 127 | func GetSectorMap(d DiskSectors) map[string]*DiskSector { 128 | 129 | out := make(map[string]*DiskSector) 130 | for _, v := range d { 131 | out[fmt.Sprintf("T%d,S%d", v.Track, v.Sector)] = v 132 | } 133 | return out 134 | 135 | } 136 | 137 | type SectorOverlapRecord struct { 138 | same map[string]map[*DiskSector]*DiskSector 139 | percent map[string]float64 140 | missing map[string][]*DiskSector 141 | extras map[string][]*DiskSector 142 | } 143 | 144 | func (f *SectorOverlapRecord) Remove(key string) { 145 | delete(f.same, key) 146 | delete(f.percent, key) 147 | delete(f.missing, key) 148 | delete(f.extras, key) 149 | } 150 | 151 | func (f *SectorOverlapRecord) IsSubsetOf(filename string) bool { 152 | 153 | // f is a subset if: 154 | // missing == 0 155 | // extra > 0 156 | 157 | if _, ok := f.same[filename]; !ok { 158 | return false 159 | } 160 | 161 | return len(f.extras[filename]) > 0 && len(f.missing[filename]) == 0 162 | 163 | } 164 | 165 | func (f *SectorOverlapRecord) IsSupersetOf(filename string) bool { 166 | 167 | // f is a superset if: 168 | // missing > 0 169 | // extra == 0 170 | 171 | if _, ok := f.same[filename]; !ok { 172 | return false 173 | } 174 | 175 | return len(f.extras[filename]) == 0 && len(f.missing[filename]) > 0 176 | 177 | } 178 | 179 | func CompareSectors(d, b DiskSectors, r *SectorOverlapRecord, key string) float64 { 180 | 181 | var sameSectors float64 182 | var missingSectors float64 183 | var extraSectors float64 184 | 185 | var dmap = GetSectorMap(d) 186 | var bmap = GetSectorMap(b) 187 | 188 | for fileCk, info := range dmap { 189 | 190 | binfo, bEx := bmap[fileCk] 191 | 192 | if bEx && info.SHA256 == binfo.SHA256 { 193 | sameSectors += 1 194 | if r.same[key] == nil { 195 | r.same[key] = make(map[*DiskSector]*DiskSector) 196 | } 197 | 198 | r.same[key][binfo] = info 199 | } else { 200 | missingSectors += 1 201 | if r.missing[key] == nil { 202 | r.missing[key] = make([]*DiskSector, 0) 203 | } 204 | r.missing[key] = append(r.missing[key], info) 205 | } 206 | 207 | } 208 | 209 | for fileCk, info := range bmap { 210 | 211 | _, dEx := dmap[fileCk] 212 | 213 | if !dEx { 214 | extraSectors += 1 215 | // file match 216 | if r.extras[key] == nil { 217 | r.extras[key] = make([]*DiskSector, 0) 218 | } 219 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 220 | r.extras[key] = append(r.extras[key], info) 221 | } 222 | 223 | } 224 | 225 | if (sameSectors + extraSectors + missingSectors) == 0 { 226 | return 0 227 | } 228 | 229 | // return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal 230 | return sameSectors / (sameSectors + extraSectors + missingSectors) 231 | 232 | } 233 | 234 | // Actual fuzzy file match report 235 | func CollectSectorOverlapsAboveThreshold(t float64, pathfilter []string, ff func(pattern string, pathfilter []string) map[string]DiskSectors) map[string]*SectorOverlapRecord { 236 | 237 | filerecords := ff("*_*_*_*.fgp", pathfilter) 238 | 239 | results := make(map[string]*SectorOverlapRecord) 240 | 241 | workchan := make(chan string, 100) 242 | var wg sync.WaitGroup 243 | var s sync.Mutex 244 | 245 | c := make(chan os.Signal, 1) 246 | signal.Notify(c, os.Interrupt) 247 | 248 | for i := 0; i < processWorkers; i++ { 249 | wg.Add(1) 250 | go func() { 251 | for m := range workchan { 252 | 253 | v := &SectorOverlapRecord{ 254 | same: make(map[string]map[*DiskSector]*DiskSector), 255 | percent: make(map[string]float64), 256 | missing: make(map[string][]*DiskSector), 257 | extras: make(map[string][]*DiskSector), 258 | } 259 | 260 | d := filerecords[m] 261 | 262 | for k, b := range filerecords { 263 | if k == m { 264 | continue // dont compare ourselves 265 | } 266 | // ok good to compare -- only keep if we need our threshold 267 | 268 | if closeness := CompareSectors(d, b, v, k); closeness < t { 269 | v.Remove(k) 270 | } else { 271 | v.percent[k] = closeness 272 | } 273 | } 274 | 275 | // since we delete < threshold, only add if we have any result 276 | if len(v.percent) > 0 { 277 | //os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n") 278 | s.Lock() 279 | results[m] = v 280 | s.Unlock() 281 | } 282 | 283 | } 284 | wg.Done() 285 | }() 286 | } 287 | 288 | // feed data in 289 | var lastPc int = -1 290 | var i int 291 | for k, _ := range filerecords { 292 | 293 | if len(c) > 0 { 294 | sig := <-c 295 | if sig == os.Interrupt { 296 | close(c) 297 | os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n") 298 | break 299 | } 300 | } 301 | 302 | workchan <- k 303 | 304 | pc := int(100 * float64(i) / float64(len(filerecords))) 305 | 306 | if pc != lastPc { 307 | fmt.Print("\r") 308 | os.Stderr.WriteString(fmt.Sprintf("Processing sectors data... %d%% ", pc)) 309 | } 310 | 311 | lastPc = pc 312 | i++ 313 | } 314 | 315 | close(workchan) 316 | wg.Wait() 317 | 318 | return results 319 | 320 | } 321 | 322 | // Actual fuzzy file match report 323 | func CollectSectorSubsets(pathfilter []string, ff func(pattern string, pathfilter []string) map[string]DiskSectors) map[string]*SectorOverlapRecord { 324 | 325 | filerecords := ff("*_*_*_*.fgp", pathfilter) 326 | 327 | results := make(map[string]*SectorOverlapRecord) 328 | 329 | workchan := make(chan string, 100) 330 | var wg sync.WaitGroup 331 | var s sync.Mutex 332 | 333 | c := make(chan os.Signal, 1) 334 | signal.Notify(c, os.Interrupt) 335 | 336 | for i := 0; i < processWorkers; i++ { 337 | wg.Add(1) 338 | go func() { 339 | for m := range workchan { 340 | 341 | v := &SectorOverlapRecord{ 342 | same: make(map[string]map[*DiskSector]*DiskSector), 343 | percent: make(map[string]float64), 344 | missing: make(map[string][]*DiskSector), 345 | extras: make(map[string][]*DiskSector), 346 | } 347 | 348 | d := filerecords[m] 349 | 350 | for k, b := range filerecords { 351 | if k == m { 352 | continue // dont compare ourselves 353 | } 354 | // ok good to compare -- only keep if we need our threshold 355 | 356 | closeness := CompareSectors(d, b, v, k) 357 | 358 | if !v.IsSubsetOf(k) { 359 | v.Remove(k) 360 | } else { 361 | v.percent[k] = closeness 362 | } 363 | } 364 | 365 | // since we delete < threshold, only add if we have any result 366 | if len(v.percent) > 0 { 367 | //os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n") 368 | s.Lock() 369 | results[m] = v 370 | s.Unlock() 371 | } 372 | 373 | } 374 | wg.Done() 375 | }() 376 | } 377 | 378 | // feed data in 379 | var lastPc int = -1 380 | var i int 381 | for k, _ := range filerecords { 382 | 383 | if len(c) > 0 { 384 | sig := <-c 385 | if sig == os.Interrupt { 386 | close(c) 387 | os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n") 388 | break 389 | } 390 | } 391 | 392 | workchan <- k 393 | 394 | pc := int(100 * float64(i) / float64(len(filerecords))) 395 | 396 | if pc != lastPc { 397 | fmt.Print("\r") 398 | os.Stderr.WriteString(fmt.Sprintf("Processing sectors data... %d%% ", pc)) 399 | } 400 | 401 | lastPc = pc 402 | i++ 403 | } 404 | 405 | close(workchan) 406 | wg.Wait() 407 | 408 | return results 409 | 410 | } 411 | -------------------------------------------------------------------------------- /fuzzyfiles.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "os/signal" 7 | "strings" 8 | "sync" 9 | ) 10 | 11 | func inList(item string, list []string) bool { 12 | for _, v := range list { 13 | if strings.ToLower(v) == strings.ToLower(item) { 14 | return true 15 | } 16 | } 17 | return false 18 | } 19 | 20 | const EXCLUDEZEROBYTE = true 21 | const EXCLUDEHELLO = true 22 | 23 | func GetAllFiles(pattern string, pathfilter []string) map[string]DiskCatalog { 24 | 25 | cache := make(map[string]DiskCatalog) 26 | 27 | exists, matches := existsPattern(*baseName, pathfilter, pattern) 28 | if !exists { 29 | return cache 30 | } 31 | 32 | workchan := make(chan string, 100) 33 | var s sync.Mutex 34 | var wg sync.WaitGroup 35 | 36 | for i := 0; i < ingestWorkers; i++ { 37 | wg.Add(1) 38 | go func() { 39 | for m := range workchan { 40 | item := &Disk{} 41 | if err := item.ReadFromFile(m); err == nil { 42 | 43 | if len(item.Files) == 0 { 44 | continue 45 | } 46 | 47 | // Load cache 48 | s.Lock() 49 | cache[item.FullPath] = item.Files 50 | s.Unlock() 51 | 52 | } else { 53 | fmt.Println("FAIL") 54 | } 55 | } 56 | wg.Done() 57 | }() 58 | } 59 | 60 | var lastPc int = -1 61 | for i, m := range matches { 62 | 63 | //fmt.Printf("Queue: %s\n", m) 64 | 65 | workchan <- m 66 | 67 | pc := int(100 * float64(i) / float64(len(matches))) 68 | 69 | if pc != lastPc { 70 | fmt.Print("\r") 71 | os.Stderr.WriteString(fmt.Sprintf("Caching data... %d%% ", pc)) 72 | } 73 | 74 | lastPc = pc 75 | } 76 | close(workchan) 77 | 78 | wg.Wait() 79 | 80 | return cache 81 | } 82 | 83 | type FileOverlapRecord struct { 84 | files map[string]map[*DiskFile]*DiskFile 85 | percent map[string]float64 86 | missing map[string][]*DiskFile 87 | extras map[string][]*DiskFile 88 | } 89 | 90 | func (f *FileOverlapRecord) Remove(key string) { 91 | delete(f.files, key) 92 | delete(f.percent, key) 93 | delete(f.missing, key) 94 | delete(f.extras, key) 95 | } 96 | 97 | func (f *FileOverlapRecord) IsSubsetOf(filename string) bool { 98 | 99 | // f is a subset if: 100 | // missing == 0 101 | // extra > 0 102 | 103 | if _, ok := f.files[filename]; !ok { 104 | return false 105 | } 106 | 107 | return len(f.extras[filename]) > 0 && len(f.missing[filename]) == 0 108 | 109 | } 110 | 111 | func (f *FileOverlapRecord) IsSupersetOf(filename string) bool { 112 | 113 | // f is a superset if: 114 | // missing > 0 115 | // extra == 0 116 | 117 | if _, ok := f.files[filename]; !ok { 118 | return false 119 | } 120 | 121 | return len(f.extras[filename]) == 0 && len(f.missing[filename]) > 0 122 | 123 | } 124 | 125 | // Actual fuzzy file match report 126 | func CollectFilesOverlapsAboveThreshold(t float64, pathfilter []string) map[string]*FileOverlapRecord { 127 | 128 | filerecords := GetAllFiles("*_*_*_*.fgp", pathfilter) 129 | 130 | results := make(map[string]*FileOverlapRecord) 131 | 132 | workchan := make(chan string, 100) 133 | var wg sync.WaitGroup 134 | var s sync.Mutex 135 | 136 | c := make(chan os.Signal, 1) 137 | signal.Notify(c, os.Interrupt) 138 | 139 | for i := 0; i < processWorkers; i++ { 140 | wg.Add(1) 141 | go func() { 142 | for m := range workchan { 143 | 144 | v := &FileOverlapRecord{ 145 | files: make(map[string]map[*DiskFile]*DiskFile), 146 | percent: make(map[string]float64), 147 | missing: make(map[string][]*DiskFile), 148 | extras: make(map[string][]*DiskFile), 149 | } 150 | 151 | d := filerecords[m] 152 | 153 | for k, b := range filerecords { 154 | if k == m { 155 | continue // dont compare ourselves 156 | } 157 | // ok good to compare -- only keep if we need our threshold 158 | 159 | if closeness := CompareCatalogs(d, b, v, k); closeness < t { 160 | v.Remove(k) 161 | } else { 162 | v.percent[k] = closeness 163 | } 164 | } 165 | 166 | // since we delete < threshold, only add if we have any result 167 | if len(v.percent) > 0 { 168 | //os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n") 169 | s.Lock() 170 | results[m] = v 171 | s.Unlock() 172 | } 173 | 174 | } 175 | wg.Done() 176 | }() 177 | } 178 | 179 | // feed data in 180 | var lastPc int = -1 181 | var i int 182 | for k, _ := range filerecords { 183 | 184 | if len(c) > 0 { 185 | sig := <-c 186 | if sig == os.Interrupt { 187 | close(c) 188 | os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n") 189 | break 190 | } 191 | } 192 | 193 | workchan <- k 194 | 195 | pc := int(100 * float64(i) / float64(len(filerecords))) 196 | 197 | if pc != lastPc { 198 | fmt.Print("\r") 199 | os.Stderr.WriteString(fmt.Sprintf("Processing files data... %d%% ", pc)) 200 | } 201 | 202 | lastPc = pc 203 | i++ 204 | } 205 | 206 | close(workchan) 207 | wg.Wait() 208 | 209 | return results 210 | 211 | } 212 | 213 | func GetCatalogMap(d DiskCatalog) map[string]*DiskFile { 214 | 215 | out := make(map[string]*DiskFile) 216 | for _, v := range d { 217 | out[v.SHA256] = v 218 | } 219 | return out 220 | 221 | } 222 | 223 | func CompareCatalogs(d, b DiskCatalog, r *FileOverlapRecord, key string) float64 { 224 | 225 | var sameFiles float64 226 | var missingFiles float64 227 | var extraFiles float64 228 | 229 | var dmap = GetCatalogMap(d) 230 | var bmap = GetCatalogMap(b) 231 | 232 | for fileCk, info := range dmap { 233 | 234 | if info.Size == 0 && EXCLUDEZEROBYTE { 235 | continue 236 | } 237 | 238 | if info.Filename == "hello" && EXCLUDEHELLO { 239 | continue 240 | } 241 | 242 | binfo, bEx := bmap[fileCk] 243 | 244 | if bEx { 245 | sameFiles += 1 246 | // file match 247 | if r.files[key] == nil { 248 | r.files[key] = make(map[*DiskFile]*DiskFile) 249 | } 250 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 251 | r.files[key][binfo] = info 252 | } else { 253 | missingFiles += 1 254 | // file match 255 | if r.missing[key] == nil { 256 | r.missing[key] = make([]*DiskFile, 0) 257 | } 258 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 259 | r.missing[key] = append(r.missing[key], info) 260 | } 261 | 262 | } 263 | 264 | for fileCk, info := range bmap { 265 | 266 | if info.Size == 0 { 267 | continue 268 | } 269 | 270 | _, dEx := dmap[fileCk] 271 | 272 | if !dEx { 273 | extraFiles += 1 274 | // file match 275 | if r.extras[key] == nil { 276 | r.extras[key] = make([]*DiskFile, 0) 277 | } 278 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 279 | r.extras[key] = append(r.extras[key], info) 280 | } 281 | 282 | } 283 | 284 | if (sameFiles + extraFiles + missingFiles) == 0 { 285 | return 0 286 | } 287 | 288 | // return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal 289 | return sameFiles / (sameFiles + extraFiles + missingFiles) 290 | 291 | } 292 | 293 | func CollectFileSubsets(pathfilter []string) map[string]*FileOverlapRecord { 294 | 295 | filerecords := GetAllFiles("*_*_*_*.fgp", pathfilter) 296 | 297 | results := make(map[string]*FileOverlapRecord) 298 | 299 | workchan := make(chan string, 100) 300 | var wg sync.WaitGroup 301 | var s sync.Mutex 302 | 303 | c := make(chan os.Signal, 1) 304 | signal.Notify(c, os.Interrupt) 305 | 306 | for i := 0; i < processWorkers; i++ { 307 | wg.Add(1) 308 | go func() { 309 | for m := range workchan { 310 | 311 | v := &FileOverlapRecord{ 312 | files: make(map[string]map[*DiskFile]*DiskFile), 313 | percent: make(map[string]float64), 314 | missing: make(map[string][]*DiskFile), 315 | extras: make(map[string][]*DiskFile), 316 | } 317 | 318 | d := filerecords[m] 319 | 320 | for k, b := range filerecords { 321 | if k == m { 322 | continue // dont compare ourselves 323 | } 324 | // ok good to compare -- only keep if we need our threshold 325 | 326 | closeness := CompareCatalogs(d, b, v, k) 327 | if !v.IsSubsetOf(k) { 328 | v.Remove(k) 329 | } else { 330 | v.percent[k] = closeness 331 | } 332 | } 333 | 334 | // since we delete < threshold, only add if we have any result 335 | if len(v.percent) > 0 { 336 | //os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n") 337 | s.Lock() 338 | results[m] = v 339 | s.Unlock() 340 | } 341 | 342 | } 343 | wg.Done() 344 | }() 345 | } 346 | 347 | // feed data in 348 | var lastPc int = -1 349 | var i int 350 | for k, _ := range filerecords { 351 | 352 | if len(c) > 0 { 353 | sig := <-c 354 | if sig == os.Interrupt { 355 | close(c) 356 | os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n") 357 | break 358 | } 359 | } 360 | 361 | workchan <- k 362 | 363 | pc := int(100 * float64(i) / float64(len(filerecords))) 364 | 365 | if pc != lastPc { 366 | fmt.Print("\r") 367 | os.Stderr.WriteString(fmt.Sprintf("Processing files data... %d%% ", pc)) 368 | } 369 | 370 | lastPc = pc 371 | i++ 372 | } 373 | 374 | close(workchan) 375 | wg.Wait() 376 | 377 | return results 378 | 379 | } 380 | 381 | func CollectFilesOverlapsCustom(keep func(d1, d2 string, v *FileOverlapRecord) bool, pathfilter []string) map[string]*FileOverlapRecord { 382 | 383 | filerecords := GetAllFiles("*_*_*_*.fgp", pathfilter) 384 | 385 | results := make(map[string]*FileOverlapRecord) 386 | 387 | workchan := make(chan string, 100) 388 | var wg sync.WaitGroup 389 | var s sync.Mutex 390 | 391 | c := make(chan os.Signal, 1) 392 | signal.Notify(c, os.Interrupt) 393 | 394 | for i := 0; i < processWorkers; i++ { 395 | wg.Add(1) 396 | go func() { 397 | for m := range workchan { 398 | 399 | v := &FileOverlapRecord{ 400 | files: make(map[string]map[*DiskFile]*DiskFile), 401 | percent: make(map[string]float64), 402 | missing: make(map[string][]*DiskFile), 403 | extras: make(map[string][]*DiskFile), 404 | } 405 | 406 | d := filerecords[m] 407 | 408 | for k, b := range filerecords { 409 | if k == m { 410 | continue // dont compare ourselves 411 | } 412 | // ok good to compare -- only keep if we need our threshold 413 | closeness := CompareCatalogs(d, b, v, k) 414 | 415 | if !keep(m, k, v) { 416 | v.Remove(k) 417 | } else { 418 | v.percent[k] = closeness 419 | } 420 | } 421 | 422 | // since we delete < threshold, only add if we have any result 423 | if len(v.files) > 0 { 424 | //os.Stderr.WriteString("\r\nAdded file: " + m + "\r\n\r\n") 425 | s.Lock() 426 | results[m] = v 427 | s.Unlock() 428 | } 429 | 430 | } 431 | wg.Done() 432 | }() 433 | } 434 | 435 | // feed data in 436 | var lastPc int = -1 437 | var i int 438 | for k, _ := range filerecords { 439 | 440 | if len(c) > 0 { 441 | sig := <-c 442 | if sig == os.Interrupt { 443 | close(c) 444 | os.Stderr.WriteString("\r\nInterrupted. Waiting for workers to stop.\r\n\r\n") 445 | break 446 | } 447 | } 448 | 449 | workchan <- k 450 | 451 | pc := int(100 * float64(i) / float64(len(filerecords))) 452 | 453 | if pc != lastPc { 454 | fmt.Print("\r") 455 | os.Stderr.WriteString(fmt.Sprintf("Processing files data... %d%% ", pc)) 456 | } 457 | 458 | lastPc = pc 459 | i++ 460 | } 461 | 462 | close(workchan) 463 | wg.Wait() 464 | 465 | return results 466 | 467 | } 468 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | /* 4 | DiskM8 is an open source offshoot of the file handling code from the Octalyzer 5 | project. 6 | 7 | It provides some command line tools for manipulating Apple // disk images, and 8 | some work in progress reporting tools to ingest large quantities of files, 9 | catalog them and detect duplicates. 10 | 11 | The code currently needs a lot of refactoring and cleanup, which we will be working 12 | through as time goes by. 13 | */ 14 | 15 | import ( 16 | "flag" 17 | "fmt" 18 | "io/ioutil" 19 | "os" 20 | "path" 21 | "path/filepath" 22 | "runtime" 23 | "runtime/debug" 24 | "strings" 25 | "time" 26 | 27 | "github.com/paleotronic/diskm8/disk" 28 | "github.com/paleotronic/diskm8/loggy" 29 | "github.com/paleotronic/diskm8/panic" 30 | ) 31 | 32 | func usage() { 33 | fmt.Printf(`%s 34 | 35 | Tool checks for duplicate or similar apple ][ disks, specifically those 36 | with %d bytes size. 37 | 38 | `, path.Base(os.Args[0]), disk.STD_DISK_BYTES) 39 | flag.PrintDefaults() 40 | } 41 | 42 | func binpath() string { 43 | 44 | if runtime.GOOS == "windows" { 45 | return os.Getenv("USERPROFILE") + "/DiskM8" 46 | } 47 | return os.Getenv("HOME") + "/DiskM8" 48 | 49 | } 50 | 51 | func init() { 52 | loggy.LogFolder = binpath() + "/logs/" 53 | } 54 | 55 | var dskName = flag.String("ingest", "", "Disk file or path to ingest") 56 | var dskInfo = flag.String("query", "", "Disk file to query or analyze") 57 | var baseName = flag.String("datastore", binpath()+"/fingerprints", "Database of disk fingerprints for checking") 58 | var verbose = flag.Bool("verbose", false, "Log to stderr") 59 | var fileDupes = flag.Bool("file-dupes", false, "Run file dupe report") 60 | var wholeDupes = flag.Bool("whole-dupes", false, "Run whole disk dupe report") 61 | var activeDupes = flag.Bool("as-dupes", false, "Run active sectors only disk dupe report") 62 | var asPartial = flag.Bool("as-partial", false, "Run partial active sector match against single disk (-disk required)") 63 | var similarity = flag.Float64("similarity", 0.90, "Object match threshold for -*-partial reports") 64 | var minSame = flag.Int("min-same", 0, "Minimum same # files for -all-file-partial") 65 | var maxDiff = flag.Int("max-diff", 0, "Maximum different # files for -all-file-partial") 66 | var filePartial = flag.Bool("file-partial", false, "Run partial file match against single disk (-disk required)") 67 | var fileMatch = flag.String("file", "", "Search for other disks containing file") 68 | var dir = flag.Bool("dir", false, "Directory specified disk (needs -disk)") 69 | var dirFormat = flag.String("dir-format", "{filename} {type} {size:kb} Checksum: {sha256}", "Format of dir") 70 | var preCache = flag.Bool("c", true, "Cache data to memory for quicker processing") 71 | var allFilePartial = flag.Bool("all-file-partial", false, "Run partial file match against all disks") 72 | var allSectorPartial = flag.Bool("all-sector-partial", false, "Run partial sector match (all) against all disks") 73 | var activeSectorPartial = flag.Bool("active-sector-partial", false, "Run partial sector match (active only) against all disks") 74 | var allFileSubset = flag.Bool("all-file-subset", false, "Run subset file match against all disks") 75 | var activeSectorSubset = flag.Bool("active-sector-subset", false, "Run subset (active) sector match against all disks") 76 | var allSectorSubset = flag.Bool("all-sector-subset", false, "Run subset (non-zero) sector match against all disks") 77 | var filterPath = flag.Bool("select", false, "Select files for analysis or search based on file/dir/mask") 78 | var csvOut = flag.Bool("csv", false, "Output data to CSV format") 79 | var reportFile = flag.String("out", "", "Output file (empty for stdout)") 80 | var catDupes = flag.Bool("cat-dupes", false, "Run duplicate catalog report") 81 | var searchFilename = flag.String("search-filename", "", "Search database for file with name") 82 | var searchSHA = flag.String("search-sha", "", "Search database for file with checksum") 83 | var searchTEXT = flag.String("search-text", "", "Search database for file containing text") 84 | var forceIngest = flag.Bool("force", false, "Force re-ingest disks that already exist") 85 | var ingestMode = flag.Int("ingest-mode", 1, "Ingest mode:\n\t0=Fingerprints only\n\t1=Fingerprints + text\n\t2=Fingerprints + sector data\n\t3=All") 86 | var extract = flag.String("extract", "", "Extract files/disks matched in searches ('#'=extract disk, '@'=extract files)") 87 | var adornedCP = flag.Bool("adorned", true, "Extract files named similar to CP") 88 | var shell = flag.Bool("shell", false, "Start interactive mode") 89 | var shellBatch = flag.String("shell-batch", "", "Execute shell command(s) from file and exit") 90 | var withDisk = flag.String("with-disk", "", "Perform disk operation (-file-extract,-file-put,-file-delete)") 91 | var withPath = flag.String("with-path", "", "Target path for disk operation (-file-extract,-file-put,-file-delete)") 92 | var fileExtract = flag.String("file-extract", "", "File to extract from disk (-with-disk)") 93 | var filePut = flag.String("file-put", "", "File to put on disk (-with-disk)") 94 | var fileDelete = flag.String("file-delete", "", "File to delete (-with-disk)") 95 | var fileMkdir = flag.String("dir-create", "", "Directory to create (-with-disk)") 96 | var fileCatalog = flag.Bool("catalog", false, "List disk contents (-with-disk)") 97 | var quarantine = flag.Bool("quarantine", false, "Run -as-dupes and -whole-disk in quarantine mode") 98 | 99 | func main() { 100 | 101 | runtime.GOMAXPROCS(8) 102 | 103 | flag.Parse() 104 | 105 | if *withDisk == "" && *shellBatch == "" { 106 | banner() 107 | } 108 | 109 | var filterpath []string 110 | 111 | if *filterPath || *shell { 112 | for _, v := range flag.Args() { 113 | filterpath = append(filterpath, filepath.Clean(v)) 114 | } 115 | } 116 | 117 | //l.SILENT = !*logToFile 118 | loggy.ECHO = *verbose 119 | 120 | if *withDisk != "" { 121 | dsk, err := disk.NewDSKWrapper(defNibbler, *withDisk) 122 | if err != nil { 123 | os.Stderr.WriteString(err.Error()) 124 | os.Exit(2) 125 | } 126 | commandVolumes[0] = dsk 127 | commandTarget = 0 128 | 129 | if *withPath != "" { 130 | shellProcess("prefix " + *withPath) 131 | } 132 | 133 | switch { 134 | case *fileExtract != "": 135 | shellProcess("extract " + *fileExtract) 136 | case *filePut != "": 137 | shellProcess("put " + *filePut) 138 | case *fileMkdir != "": 139 | shellProcess("mkdir " + *fileMkdir) 140 | case *fileDelete != "": 141 | shellProcess("delete " + *fileDelete) 142 | case *fileCatalog: 143 | shellProcess("cat ") 144 | default: 145 | os.Stderr.WriteString("Additional flag required") 146 | os.Exit(3) 147 | } 148 | 149 | time.Sleep(5 * time.Second) 150 | 151 | os.Exit(0) 152 | } 153 | 154 | // if *preCache { 155 | // x := GetAllFiles("*_*_*_*.fgp") 156 | // fmt.Println(len(x)) 157 | // } 158 | if *shellBatch != "" { 159 | var data []byte 160 | var err error 161 | if *shellBatch == "stdin" { 162 | data, err = ioutil.ReadAll(os.Stdin) 163 | if err != nil { 164 | os.Stderr.WriteString("Failed to read commands from stdin. Aborting") 165 | os.Exit(1) 166 | } 167 | } else { 168 | data, err = ioutil.ReadFile(*shellBatch) 169 | if err != nil { 170 | os.Stderr.WriteString("Failed to read commands from file. Aborting") 171 | os.Exit(1) 172 | } 173 | } 174 | lines := strings.Split(string(data), "\n") 175 | for i, l := range lines { 176 | r := shellProcess(l) 177 | if r == -1 { 178 | os.Stderr.WriteString(fmt.Sprintf("Script failed at line %d: %s\n", i+1, l)) 179 | os.Exit(2) 180 | } 181 | if r == 999 { 182 | os.Stderr.WriteString("Script terminated") 183 | return 184 | } 185 | } 186 | return 187 | } 188 | 189 | if *shell { 190 | var dsk *disk.DSKWrapper 191 | var err error 192 | if len(filterpath) > 0 { 193 | fmt.Printf("Trying to load %s\n", filterpath[0]) 194 | dsk, err = disk.NewDSKWrapper(defNibbler, filterpath[0]) 195 | if err != nil { 196 | fmt.Println("Error: " + err.Error()) 197 | os.Exit(1) 198 | } 199 | } 200 | shellDo(dsk) 201 | os.Exit(0) 202 | } 203 | 204 | defer func() { 205 | 206 | if fileExtractCounter > 0 { 207 | os.Stderr.WriteString(fmt.Sprintf("%d files were extracted\n", fileExtractCounter)) 208 | } 209 | 210 | }() 211 | 212 | if *searchFilename != "" { 213 | searchForFilename(*searchFilename, filterpath) 214 | return 215 | } 216 | 217 | if *searchSHA != "" { 218 | searchForSHA256(*searchSHA, filterpath) 219 | return 220 | } 221 | 222 | if *searchTEXT != "" { 223 | searchForTEXT(*searchTEXT, filterpath) 224 | return 225 | } 226 | 227 | if *dir { 228 | directory(filterpath, *dirFormat) 229 | return 230 | } 231 | 232 | if *allFileSubset { 233 | allFilesSubsetReport(filterpath) 234 | os.Exit(0) 235 | } 236 | 237 | if *activeSectorSubset { 238 | activeSectorsSubsetReport(filterpath) 239 | os.Exit(0) 240 | } 241 | 242 | if *allSectorSubset { 243 | allSectorsSubsetReport(filterpath) 244 | os.Exit(0) 245 | } 246 | 247 | if *catDupes { 248 | allFilesPartialReport(1.0, filterpath, "DUPLICATE CATALOG REPORT") 249 | os.Exit(0) 250 | } 251 | 252 | if *allFilePartial { 253 | if *minSame == 0 && *maxDiff == 0 { 254 | allFilesPartialReport(*similarity, filterpath, "") 255 | } else if *minSame > 0 { 256 | allFilesCustomReport(keeperAtLeastNSame, filterpath, fmt.Sprintf("AT LEAST %d FILES MATCH", *minSame)) 257 | } else if *maxDiff > 0 { 258 | allFilesCustomReport(keeperMaximumNDiff, filterpath, fmt.Sprintf("NO MORE THAN %d FILES DIFFER", *maxDiff)) 259 | } 260 | os.Exit(0) 261 | } 262 | 263 | if *allSectorPartial { 264 | allSectorsPartialReport(*similarity, filterpath) 265 | os.Exit(0) 266 | } 267 | 268 | if *activeSectorPartial { 269 | activeSectorsPartialReport(*similarity, filterpath) 270 | os.Exit(0) 271 | } 272 | 273 | if *fileDupes { 274 | fileDupeReport(filterpath) 275 | os.Exit(0) 276 | } 277 | 278 | if *wholeDupes { 279 | if *quarantine { 280 | quarantineWholeDisks(filterpath) 281 | } else { 282 | wholeDupeReport(filterpath) 283 | } 284 | os.Exit(0) 285 | } 286 | 287 | if *activeDupes { 288 | if *quarantine { 289 | quarantineActiveDisks(filterpath) 290 | } else { 291 | activeDupeReport(filterpath) 292 | } 293 | os.Exit(0) 294 | } 295 | 296 | _, e := os.Stat(*baseName) 297 | if e != nil { 298 | loggy.Get(0).Logf("Creating path %s", *baseName) 299 | os.MkdirAll(*baseName, 0755) 300 | } 301 | 302 | if *dskName == "" && *dskInfo == "" { 303 | 304 | var dsk *disk.DSKWrapper 305 | var err error 306 | if len(filterpath) > 0 { 307 | fmt.Printf("Trying to load %s\n", filterpath[0]) 308 | dsk, err = disk.NewDSKWrapper(defNibbler, filterpath[0]) 309 | if err != nil { 310 | fmt.Println("Error: " + err.Error()) 311 | os.Exit(1) 312 | } 313 | } 314 | shellDo(dsk) 315 | os.Exit(0) 316 | 317 | } 318 | 319 | info, err := os.Stat(*dskName) 320 | if err != nil { 321 | loggy.Get(0).Errorf("Error stating file: %s", err.Error()) 322 | os.Exit(2) 323 | } 324 | if info.IsDir() { 325 | walk(*dskName) 326 | } else { 327 | indisk = make(map[disk.DiskFormat]int) 328 | outdisk = make(map[disk.DiskFormat]int) 329 | 330 | panic.Do( 331 | func() { 332 | dsk, e := analyze(0, *dskName) 333 | // handle any disk specific 334 | if e == nil && *asPartial { 335 | asPartialReport(dsk, *similarity, *reportFile, filterpath) 336 | } else if e == nil && *filePartial { 337 | filePartialReport(dsk, *similarity, *reportFile, filterpath) 338 | } else if e == nil && *fileMatch != "" { 339 | fileMatchReport(dsk, *fileMatch, filterpath) 340 | } else if e == nil && *dir { 341 | info := dsk.GetDirectory(*dirFormat) 342 | fmt.Printf("Directory of %s:\n\n", dsk.Filename) 343 | fmt.Println(info) 344 | } 345 | }, 346 | func(r interface{}) { 347 | loggy.Get(0).Errorf("Error processing volume: %s", *dskName) 348 | loggy.Get(0).Errorf(string(debug.Stack())) 349 | }, 350 | ) 351 | } 352 | } 353 | -------------------------------------------------------------------------------- /report.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "sort" 7 | ) 8 | 9 | type DuplicateSource struct { 10 | Fullpath string 11 | Filename string 12 | GSHA string 13 | fingerprint string 14 | } 15 | 16 | type DuplicateFileCollection struct { 17 | data map[string][]DuplicateSource 18 | } 19 | 20 | type DuplicateWholeDiskCollection struct { 21 | data map[string][]DuplicateSource 22 | } 23 | 24 | type DuplicateActiveSectorDiskCollection struct { 25 | data map[string][]DuplicateSource 26 | data_as map[string][]DuplicateSource 27 | } 28 | 29 | func (dfc *DuplicateFileCollection) Add(checksum string, fullpath string, filename string, fgp string) { 30 | 31 | if dfc.data == nil { 32 | dfc.data = make(map[string][]DuplicateSource) 33 | } 34 | 35 | list, ok := dfc.data[checksum] 36 | if !ok { 37 | list = make([]DuplicateSource, 0) 38 | } 39 | 40 | list = append(list, DuplicateSource{Fullpath: fullpath, Filename: filename, fingerprint: fgp}) 41 | 42 | dfc.data[checksum] = list 43 | 44 | } 45 | 46 | func (dfc *DuplicateWholeDiskCollection) Add(checksum string, fullpath string, fgp string) { 47 | 48 | if dfc.data == nil { 49 | dfc.data = make(map[string][]DuplicateSource) 50 | } 51 | 52 | list, ok := dfc.data[checksum] 53 | if !ok { 54 | list = make([]DuplicateSource, 0) 55 | } 56 | 57 | list = append(list, DuplicateSource{Fullpath: fullpath, fingerprint: fgp}) 58 | 59 | dfc.data[checksum] = list 60 | 61 | } 62 | 63 | func (dfc *DuplicateActiveSectorDiskCollection) Add(checksum string, achecksum string, fullpath string, fgp string) { 64 | 65 | if dfc.data == nil { 66 | dfc.data = make(map[string][]DuplicateSource) 67 | } 68 | 69 | list, ok := dfc.data[achecksum] 70 | if !ok { 71 | list = make([]DuplicateSource, 0) 72 | } 73 | 74 | list = append(list, DuplicateSource{Fullpath: fullpath, GSHA: checksum, fingerprint: fgp}) 75 | 76 | dfc.data[achecksum] = list 77 | 78 | } 79 | 80 | func (dfc *DuplicateFileCollection) Report(filename string) { 81 | 82 | var w *os.File 83 | var err error 84 | 85 | if filename != "" { 86 | w, err = os.Create(filename) 87 | if err != nil { 88 | return 89 | } 90 | defer w.Close() 91 | } else { 92 | w = os.Stdout 93 | } 94 | 95 | for sha256, list := range dfc.data { 96 | 97 | if len(list) > 1 { 98 | 99 | w.WriteString(fmt.Sprintf("\nChecksum %s duplicated %d times:\n", sha256, len(list))) 100 | for i, v := range list { 101 | w.WriteString(fmt.Sprintf(" %d) %s >> %s\n", i, v.Fullpath, v.Filename)) 102 | } 103 | 104 | } 105 | 106 | } 107 | 108 | } 109 | 110 | func AggregateDuplicateFiles(d *Disk, collection interface{}) { 111 | 112 | for _, f := range d.Files { 113 | 114 | collection.(*DuplicateFileCollection).Add(f.SHA256, d.FullPath, f.Filename, d.source) 115 | 116 | } 117 | 118 | } 119 | 120 | func AggregateDuplicateWholeDisks(d *Disk, collection interface{}) { 121 | 122 | collection.(*DuplicateWholeDiskCollection).Add(d.SHA256, d.FullPath, d.source) 123 | 124 | } 125 | 126 | func AggregateDuplicateActiveSectorDisks(d *Disk, collection interface{}) { 127 | 128 | collection.(*DuplicateActiveSectorDiskCollection).Add(d.SHA256, d.SHA256Active, d.FullPath, d.source) 129 | 130 | } 131 | 132 | func (dfc *DuplicateWholeDiskCollection) Report(filename string) { 133 | 134 | var disksWithDupes int 135 | var extras int 136 | 137 | var w *os.File 138 | var err error 139 | 140 | if filename != "" { 141 | w, err = os.Create(filename) 142 | if err != nil { 143 | return 144 | } 145 | defer w.Close() 146 | } else { 147 | w = os.Stdout 148 | } 149 | 150 | for sha256, list := range dfc.data { 151 | 152 | if len(list) > 1 { 153 | 154 | disksWithDupes++ 155 | 156 | original := list[0] 157 | dupes := list[1:] 158 | 159 | w.WriteString("\n") 160 | w.WriteString(fmt.Sprintf("Volume %s has %d duplicate(s):\n", original.Fullpath, len(dupes))) 161 | for _, v := range dupes { 162 | w.WriteString(fmt.Sprintf(" %s (sha256: %s)\n", v.Fullpath, sha256)) 163 | extras++ 164 | } 165 | 166 | } 167 | 168 | } 169 | 170 | w.WriteString("\n") 171 | w.WriteString("SUMMARY\n") 172 | w.WriteString("=======\n") 173 | w.WriteString(fmt.Sprintf("Total disks which have duplicates: %d\n", disksWithDupes)) 174 | w.WriteString(fmt.Sprintf("Total redundant copies found : %d\n", extras)) 175 | 176 | } 177 | 178 | func (dfc *DuplicateActiveSectorDiskCollection) Report(filename string) { 179 | 180 | var disksWithDupes int 181 | var extras int 182 | 183 | var w *os.File 184 | var err error 185 | 186 | if filename != "" { 187 | w, err = os.Create(filename) 188 | if err != nil { 189 | return 190 | } 191 | defer w.Close() 192 | } else { 193 | w = os.Stdout 194 | } 195 | 196 | for sha256, list := range dfc.data { 197 | 198 | if len(list) > 1 { 199 | 200 | m := make(map[string]int) 201 | for _, v := range list { 202 | m[v.GSHA] = 1 203 | } 204 | 205 | if len(m) == 1 { 206 | continue 207 | } 208 | 209 | disksWithDupes++ 210 | 211 | original := list[0] 212 | dupes := list[1:] 213 | 214 | w.WriteString("\n") 215 | w.WriteString("--------------------------------------\n") 216 | w.WriteString(fmt.Sprintf("Volume : %s\n", original.Fullpath)) 217 | w.WriteString(fmt.Sprintf("Active SHA256: %s\n", sha256)) 218 | w.WriteString(fmt.Sprintf("Global SHA256: %s\n", original.GSHA)) 219 | w.WriteString(fmt.Sprintf("# Duplicates : %d\n", len(dupes))) 220 | for i, v := range dupes { 221 | w.WriteString("\n") 222 | w.WriteString(fmt.Sprintf(" Duplicate #%d\n", i+1)) 223 | w.WriteString(fmt.Sprintf(" = Volume : %s\n", v.Fullpath)) 224 | w.WriteString(fmt.Sprintf(" = Active SHA256: %s\n", sha256)) 225 | w.WriteString(fmt.Sprintf(" = Global SHA256: %s\n", v.GSHA)) 226 | extras++ 227 | } 228 | w.WriteString("\n") 229 | 230 | } 231 | 232 | } 233 | 234 | w.WriteString("\n") 235 | w.WriteString("SUMMARY\n") 236 | w.WriteString("=======\n") 237 | w.WriteString(fmt.Sprintf("Total disks which have duplicates: %d\n", disksWithDupes)) 238 | w.WriteString(fmt.Sprintf("Total redundant copies found : %d\n", extras)) 239 | 240 | } 241 | 242 | func asPartialReport(d *Disk, t float64, filename string, pathfilter []string) { 243 | matches := d.GetPartialMatchesWithThreshold(t, pathfilter) 244 | 245 | var w *os.File 246 | var err error 247 | 248 | if filename != "" { 249 | w, err = os.Create(filename) 250 | if err != nil { 251 | return 252 | } 253 | defer w.Close() 254 | } else { 255 | w = os.Stdout 256 | } 257 | 258 | w.WriteString(fmt.Sprintf("PARTIAL ACTIVE SECTOR MATCH REPORT FOR %s (Above %.2f%%)\n\n", d.Filename, 100*t)) 259 | 260 | //sort.Sort(ByMatchFactor(matches)) 261 | sort.Sort(ByMatchFactor(matches)) 262 | 263 | w.WriteString(fmt.Sprintf("%d matches found\n\n", len(matches))) 264 | for i := len(matches) - 1; i >= 0; i-- { 265 | v := matches[i] 266 | 267 | w.WriteString(fmt.Sprintf("%.2f%%\t%s\n", v.MatchFactor*100, v.FullPath)) 268 | 269 | } 270 | 271 | w.WriteString("") 272 | } 273 | 274 | func filePartialReport(d *Disk, t float64, filename string, pathfilter []string) { 275 | matches := d.GetPartialFileMatchesWithThreshold(t, pathfilter) 276 | 277 | var w *os.File 278 | var err error 279 | 280 | if filename != "" { 281 | w, err = os.Create(filename) 282 | if err != nil { 283 | return 284 | } 285 | defer w.Close() 286 | } else { 287 | w = os.Stdout 288 | } 289 | 290 | w.WriteString(fmt.Sprintf("PARTIAL FILE MATCH REPORT FOR %s (Above %.2f%%)\n\n", d.Filename, 100*t)) 291 | 292 | //sort.Sort(ByMatchFactor(matches)) 293 | sort.Sort(ByMatchFactor(matches)) 294 | 295 | w.WriteString(fmt.Sprintf("%d matches found\n\n", len(matches))) 296 | for i := len(matches) - 1; i >= 0; i-- { 297 | v := matches[i] 298 | 299 | w.WriteString(fmt.Sprintf("%.2f%%\t%s (%d missing, %d extras)\n", v.MatchFactor*100, v.FullPath, len(v.MissingFiles), len(v.ExtraFiles))) 300 | for f1, f2 := range v.MatchFiles { 301 | w.WriteString(fmt.Sprintf("\t == %s -> %s\n", f1.Filename, f2.Filename)) 302 | } 303 | for _, f := range v.MissingFiles { 304 | w.WriteString(fmt.Sprintf("\t -- %s\n", f.Filename)) 305 | } 306 | for _, f := range v.ExtraFiles { 307 | w.WriteString(fmt.Sprintf("\t ++ %s\n", f.Filename)) 308 | } 309 | w.WriteString("") 310 | 311 | } 312 | 313 | w.WriteString("") 314 | } 315 | 316 | func fileMatchReport(d *Disk, filename string, pathfilter []string) { 317 | 318 | matches := d.GetFileMatches(filename, pathfilter) 319 | 320 | var w *os.File 321 | var err error 322 | 323 | if filename != "" { 324 | w, err = os.Create(filename) 325 | if err != nil { 326 | return 327 | } 328 | defer w.Close() 329 | } else { 330 | w = os.Stdout 331 | } 332 | 333 | w.WriteString(fmt.Sprintf("PARTIAL FILE MATCH REPORT FOR %s (File: %s)\n\n", d.Filename, filename)) 334 | 335 | w.WriteString(fmt.Sprintf("%d matches found\n\n", len(matches))) 336 | for i, v := range matches { 337 | 338 | w.WriteString(fmt.Sprintf("%d)\t%s\n", i, v.FullPath)) 339 | for f1, f2 := range v.MatchFiles { 340 | w.WriteString(fmt.Sprintf("\t == %s -> %s\n", f1.Filename, f2.Filename)) 341 | } 342 | w.WriteString("") 343 | 344 | } 345 | 346 | w.WriteString("") 347 | } 348 | 349 | func fileDupeReport(filter []string) { 350 | 351 | dfc := &DuplicateFileCollection{} 352 | Aggregate(AggregateDuplicateFiles, dfc, filter) 353 | 354 | fmt.Println("DUPLICATE FILE REPORT") 355 | fmt.Println() 356 | 357 | dfc.Report(*reportFile) 358 | 359 | } 360 | 361 | func wholeDupeReport(filter []string) { 362 | 363 | dfc := &DuplicateWholeDiskCollection{} 364 | Aggregate(AggregateDuplicateWholeDisks, dfc, filter) 365 | 366 | fmt.Println("DUPLICATE WHOLE DISK REPORT") 367 | fmt.Println() 368 | 369 | dfc.Report(*reportFile) 370 | 371 | } 372 | 373 | func activeDupeReport(filter []string) { 374 | 375 | dfc := &DuplicateActiveSectorDiskCollection{} 376 | Aggregate(AggregateDuplicateActiveSectorDisks, dfc, filter) 377 | 378 | fmt.Println("DUPLICATE ACTIVE SECTORS DISK REPORT") 379 | fmt.Println() 380 | 381 | dfc.Report(*reportFile) 382 | 383 | } 384 | 385 | func allFilesPartialReport(t float64, filter []string, oheading string) { 386 | 387 | matches := CollectFilesOverlapsAboveThreshold(t, filter) 388 | 389 | if *csvOut { 390 | dumpFileOverlapCSV(matches, *reportFile) 391 | return 392 | } 393 | 394 | if oheading != "" { 395 | fmt.Println(oheading + "\n") 396 | } else { 397 | fmt.Printf("PARTIAL ALL FILE MATCH REPORT (Above %.2f%%)\n\n", 100*t) 398 | } 399 | 400 | fmt.Printf("%d matches found\n\n", len(matches)) 401 | for volumename, matchdata := range matches { 402 | 403 | fmt.Printf("Disk: %s\n", volumename) 404 | 405 | for k, ratio := range matchdata.percent { 406 | fmt.Println() 407 | fmt.Printf(" :: %.2f%% Match to %s\n", 100*ratio, k) 408 | for f1, f2 := range matchdata.files[k] { 409 | fmt.Printf(" == %s -> %s\n", f1.Filename, f2.Filename) 410 | } 411 | for _, f := range matchdata.missing[k] { 412 | fmt.Printf(" -- %s\n", f.Filename) 413 | } 414 | for _, f := range matchdata.extras[k] { 415 | fmt.Printf(" ++ %s\n", f.Filename) 416 | } 417 | fmt.Println() 418 | } 419 | 420 | fmt.Println() 421 | 422 | } 423 | 424 | fmt.Println() 425 | } 426 | 427 | func allSectorsPartialReport(t float64, filter []string) { 428 | 429 | matches := CollectSectorOverlapsAboveThreshold(t, filter, GetAllDiskSectors) 430 | 431 | if *csvOut { 432 | dumpSectorOverlapCSV(matches, *reportFile) 433 | return 434 | } 435 | 436 | fmt.Printf("NON-ZERO SECTOR MATCH REPORT (Above %.2f%%)\n\n", 100*t) 437 | 438 | fmt.Printf("%d matches found\n\n", len(matches)) 439 | for volumename, matchdata := range matches { 440 | 441 | fmt.Printf("Disk: %s\n", volumename) 442 | 443 | for k, ratio := range matchdata.percent { 444 | fmt.Println() 445 | fmt.Printf(" :: %.2f%% Match to %s\n", 100*ratio, k) 446 | fmt.Printf(" == %d Sectors matched\n", len(matchdata.same[k])) 447 | fmt.Printf(" -- %d Sectors missing\n", len(matchdata.missing[k])) 448 | fmt.Printf(" ++ %d Sectors extra\n", len(matchdata.extras[k])) 449 | fmt.Println() 450 | } 451 | 452 | fmt.Println() 453 | 454 | } 455 | 456 | fmt.Println() 457 | } 458 | 459 | func activeSectorsPartialReport(t float64, filter []string) { 460 | 461 | matches := CollectSectorOverlapsAboveThreshold(t, filter, GetActiveDiskSectors) 462 | 463 | if *csvOut { 464 | dumpSectorOverlapCSV(matches, *reportFile) 465 | return 466 | } 467 | 468 | fmt.Printf("PARTIAL ACTIVE SECTOR MATCH REPORT (Above %.2f%%)\n\n", 100*t) 469 | 470 | fmt.Printf("%d matches found\n\n", len(matches)) 471 | for volumename, matchdata := range matches { 472 | 473 | fmt.Printf("Disk: %s\n", volumename) 474 | 475 | for k, ratio := range matchdata.percent { 476 | fmt.Println() 477 | fmt.Printf(" :: %.2f%% Match to %s\n", 100*ratio, k) 478 | fmt.Printf(" == %d Sectors matched\n", len(matchdata.same[k])) 479 | fmt.Printf(" -- %d Sectors missing\n", len(matchdata.missing[k])) 480 | fmt.Printf(" ++ %d Sectors extra\n", len(matchdata.extras[k])) 481 | fmt.Println() 482 | } 483 | 484 | fmt.Println() 485 | 486 | } 487 | 488 | fmt.Println() 489 | } 490 | 491 | func allFilesSubsetReport(filter []string) { 492 | 493 | matches := CollectFileSubsets(filter) 494 | 495 | if *csvOut { 496 | dumpFileOverlapCSV(matches, *reportFile) 497 | return 498 | } 499 | 500 | fmt.Printf("SUBSET DISK FILE MATCH REPORT\n\n") 501 | 502 | fmt.Printf("%d matches found\n\n", len(matches)) 503 | for volumename, matchdata := range matches { 504 | 505 | fmt.Printf("Disk: %s\n", volumename) 506 | 507 | for k, _ := range matchdata.percent { 508 | fmt.Println() 509 | fmt.Printf(" :: Is a file subset of %s\n", k) 510 | for f1, f2 := range matchdata.files[k] { 511 | fmt.Printf(" == %s -> %s\n", f1.Filename, f2.Filename) 512 | } 513 | for _, f := range matchdata.missing[k] { 514 | fmt.Printf(" -- %s\n", f.Filename) 515 | } 516 | for _, f := range matchdata.extras[k] { 517 | fmt.Printf(" ++ %s\n", f.Filename) 518 | } 519 | fmt.Println() 520 | } 521 | 522 | fmt.Println() 523 | 524 | } 525 | 526 | fmt.Println() 527 | } 528 | 529 | func activeSectorsSubsetReport(filter []string) { 530 | 531 | matches := CollectSectorSubsets(filter, GetActiveDiskSectors) 532 | 533 | if *csvOut { 534 | dumpSectorOverlapCSV(matches, *reportFile) 535 | return 536 | } 537 | 538 | fmt.Printf("ACTIVE SECTOR SUBSET MATCH REPORT\n\n") 539 | 540 | fmt.Printf("%d matches found\n\n", len(matches)) 541 | for volumename, matchdata := range matches { 542 | 543 | fmt.Printf("Disk: %s\n", volumename) 544 | 545 | for k, _ := range matchdata.percent { 546 | fmt.Println() 547 | fmt.Printf(" :: Is a subset (based on active sectors) of %s\n", k) 548 | fmt.Printf(" == %d Sectors matched\n", len(matchdata.same[k])) 549 | fmt.Printf(" ++ %d Sectors extra\n", len(matchdata.extras[k])) 550 | fmt.Println() 551 | } 552 | 553 | fmt.Println() 554 | 555 | } 556 | 557 | fmt.Println() 558 | } 559 | 560 | func allSectorsSubsetReport(filter []string) { 561 | 562 | matches := CollectSectorSubsets(filter, GetAllDiskSectors) 563 | 564 | if *csvOut { 565 | dumpSectorOverlapCSV(matches, *reportFile) 566 | return 567 | } 568 | 569 | fmt.Printf("NON-ZERO SECTOR SUBSET MATCH REPORT\n\n") 570 | 571 | fmt.Printf("%d matches found\n\n", len(matches)) 572 | for volumename, matchdata := range matches { 573 | 574 | fmt.Printf("Disk: %s\n", volumename) 575 | 576 | for k, _ := range matchdata.percent { 577 | fmt.Println() 578 | fmt.Printf(" :: Is a subset (based on active sectors) of %s\n", k) 579 | fmt.Printf(" == %d Sectors matched\n", len(matchdata.same[k])) 580 | fmt.Printf(" ++ %d Sectors extra\n", len(matchdata.extras[k])) 581 | fmt.Println() 582 | } 583 | 584 | fmt.Println() 585 | 586 | } 587 | 588 | fmt.Println() 589 | } 590 | 591 | func dumpFileOverlapCSV(matches map[string]*FileOverlapRecord, filename string) { 592 | 593 | var w *os.File 594 | var err error 595 | 596 | if filename != "" { 597 | w, err = os.Create(filename) 598 | if err != nil { 599 | return 600 | } 601 | defer w.Close() 602 | } else { 603 | w = os.Stderr 604 | } 605 | 606 | w.WriteString("MATCH,DISK1,FILENAME1,DISK2,FILENAME2,EXISTS\n") 607 | for disk1, matchdata := range matches { 608 | for disk2, match := range matchdata.percent { 609 | for f1, f2 := range matchdata.files[disk2] { 610 | w.WriteString(fmt.Sprintf(`%.2f,"%s","%s","%s","%s",%s`, match, disk1, f1.Filename, disk2, f2.Filename, "Y") + "\n") 611 | } 612 | for _, f1 := range matchdata.missing[disk2] { 613 | w.WriteString(fmt.Sprintf(`%.2f,"%s","%s","%s","%s",%s`, match, disk1, f1.Filename, disk2, "", "N") + "\n") 614 | } 615 | for _, f2 := range matchdata.extras[disk2] { 616 | w.WriteString(fmt.Sprintf(`%.2f,"%s","%s","%s","%s",%s`, match, disk1, "", disk2, f2.Filename, "N") + "\n") 617 | } 618 | } 619 | } 620 | 621 | if filename != "" { 622 | fmt.Println("\nWrote " + filename + "\n") 623 | } 624 | 625 | } 626 | 627 | func dumpSectorOverlapCSV(matches map[string]*SectorOverlapRecord, filename string) { 628 | 629 | var w *os.File 630 | var err error 631 | 632 | if filename != "" { 633 | w, err = os.Create(filename) 634 | if err != nil { 635 | return 636 | } 637 | defer w.Close() 638 | } else { 639 | w = os.Stderr 640 | } 641 | 642 | w.WriteString("MATCH,DISK1,DISK2,SAME,MISSING,EXTRA\n") 643 | for disk1, matchdata := range matches { 644 | for disk2, match := range matchdata.percent { 645 | w.WriteString(fmt.Sprintf(`%.2f,"%s","%s",%d,%d,%d`, match, disk1, disk2, len(matchdata.same[disk2]), len(matchdata.missing[disk2]), len(matchdata.extras[disk2])) + "\n") 646 | } 647 | } 648 | 649 | if filename != "" { 650 | fmt.Println("\nWrote " + filename + "\n") 651 | } 652 | 653 | } 654 | 655 | func keeperAtLeastNSame(d1, d2 string, v *FileOverlapRecord) bool { 656 | 657 | return len(v.files[d2]) >= *minSame 658 | 659 | } 660 | 661 | func keeperMaximumNDiff(d1, d2 string, v *FileOverlapRecord) bool { 662 | 663 | return len(v.files[d2]) > 0 && (len(v.missing[d2])+len(v.extras[d2])) <= *maxDiff 664 | 665 | } 666 | 667 | func allFilesCustomReport(keep func(d1, d2 string, v *FileOverlapRecord) bool, filter []string, oheading string) { 668 | 669 | matches := CollectFilesOverlapsCustom(keep, filter) 670 | 671 | if *csvOut { 672 | dumpFileOverlapCSV(matches, *reportFile) 673 | return 674 | } 675 | 676 | fmt.Println(oheading + "\n") 677 | 678 | fmt.Printf("%d matches found\n\n", len(matches)) 679 | for volumename, matchdata := range matches { 680 | 681 | fmt.Printf("Disk: %s\n", volumename) 682 | 683 | for k, ratio := range matchdata.percent { 684 | fmt.Println() 685 | fmt.Printf(" :: %.2f%% Match to %s\n", 100*ratio, k) 686 | for f1, f2 := range matchdata.files[k] { 687 | fmt.Printf(" == %s -> %s\n", f1.Filename, f2.Filename) 688 | } 689 | for _, f := range matchdata.missing[k] { 690 | fmt.Printf(" -- %s\n", f.Filename) 691 | } 692 | for _, f := range matchdata.extras[k] { 693 | fmt.Printf(" ++ %s\n", f.Filename) 694 | } 695 | fmt.Println() 696 | } 697 | 698 | fmt.Println() 699 | 700 | } 701 | 702 | fmt.Println() 703 | } 704 | -------------------------------------------------------------------------------- /disk/atokens.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | 8 | "regexp" 9 | ) 10 | 11 | //import "strings" 12 | 13 | var ApplesoftTokens = map[int]string{ 14 | 0x80: "END", 15 | 0x81: "FOR", 16 | 0x82: "NEXT", 17 | 0x83: "DATA", 18 | 0x84: "INPUT", 19 | 0x85: "DEL", 20 | 0x86: "DIM", 21 | 0x87: "READ", 22 | 0x88: "GR", 23 | 0x89: "TEXT", 24 | 0x8A: "PR#", 25 | 0x8B: "IN#", 26 | 0x8C: "CALL", 27 | 0x8D: "PLOT", 28 | 0x8E: "HLIN", 29 | 0x8F: "VLIN", 30 | 0x90: "HGR2", 31 | 0x91: "HGR", 32 | 0x92: "HCOLOR=", 33 | 0x93: "HPLOT", 34 | 0x94: "DRAW", 35 | 0x95: "XDRAW", 36 | 0x96: "HTAB", 37 | 0x97: "HOME", 38 | 0x98: "ROT=", 39 | 0x99: "SCALE=", 40 | 0x9A: "SHLOAD", 41 | 0x9B: "TRACE", 42 | 0x9C: "NOTRACE", 43 | 0x9D: "NORMAL", 44 | 0x9E: "INVERSE", 45 | 0x9F: "FLASH", 46 | 0xA0: "COLOR=", 47 | 0xA1: "POP", 48 | 0xA2: "VTAB", 49 | 0xA3: "HIMEM:", 50 | 0xA4: "LOMEM:", 51 | 0xA5: "ONERR", 52 | 0xA6: "RESUME", 53 | 0xA7: "RECALL", 54 | 0xA8: "STORE", 55 | 0xA9: "SPEED=", 56 | 0xAA: "LET", 57 | 0xAB: "GOTO", 58 | 0xAC: "RUN", 59 | 0xAD: "IF", 60 | 0xAE: "RESTORE", 61 | 0xAF: "&", 62 | 0xB0: "GOSUB", 63 | 0xB1: "RETURN", 64 | 0xB2: "REM", 65 | 0xB3: "STOP", 66 | 0xB4: "ON", 67 | 0xB5: "WAIT", 68 | 0xB6: "LOAD", 69 | 0xB7: "SAVE", 70 | 0xB8: "DEF", 71 | 0xB9: "POKE", 72 | 0xBA: "PRINT", 73 | 0xBB: "CONT", 74 | 0xBC: "LIST", 75 | 0xBD: "CLEAR", 76 | 0xBE: "GET", 77 | 0xBF: "NEW", 78 | 0xC0: "TAB(", 79 | 0xC1: "TO", 80 | 0xC2: "FN", 81 | 0xC3: "SPC(", 82 | 0xC4: "THEN", 83 | 0xC5: "AT", 84 | 0xC6: "NOT", 85 | 0xC7: "STEP", 86 | 0xC8: "+", 87 | 0xC9: "-", 88 | 0xCA: "*", 89 | 0xCB: "/", 90 | 0xCC: "^", 91 | 0xCD: "AND", 92 | 0xCE: "OR", 93 | 0xCF: ">", 94 | 0xD0: "=", 95 | 0xD1: "<", 96 | 0xD2: "SGN", 97 | 0xD3: "INT", 98 | 0xD4: "ABS", 99 | 0xD5: "USR", 100 | 0xD6: "FRE", 101 | 0xD7: "SCRN(", 102 | 0xD8: "PDL", 103 | 0xD9: "POS", 104 | 0xDA: "SQR", 105 | 0xDB: "RND", 106 | 0xDC: "LOG", 107 | 0xDD: "EXP", 108 | 0xDE: "COS", 109 | 0xDF: "SIN", 110 | 0xE0: "TAN", 111 | 0xE1: "ATN", 112 | 0xE2: "PEEK", 113 | 0xE3: "LEN", 114 | 0xE4: "STR$", 115 | 0xE5: "VAL", 116 | 0xE6: "ASC", 117 | 0xE7: "CHR$", 118 | 0xE8: "LEFT$", 119 | 0xE9: "RIGHT$", 120 | 0xEA: "MID$", 121 | } 122 | 123 | var ASPartials = map[string]bool{ 124 | "COLOR": true, 125 | "HCOLOR": true, 126 | "SPEED": true, 127 | "SCALE": true, 128 | "-": true, 129 | } 130 | 131 | var ApplesoftReverse map[string]int 132 | var IntegerReverse map[string]int 133 | 134 | func init() { 135 | ApplesoftReverse = make(map[string]int) 136 | IntegerReverse = make(map[string]int) 137 | for k, v := range ApplesoftTokens { 138 | ApplesoftReverse[v] = k 139 | if v == "PRINT" { 140 | ApplesoftReverse["?"] = k 141 | } 142 | } 143 | for k, v := range IntegerTokens { 144 | IntegerReverse[v] = k 145 | } 146 | 147 | tst() 148 | 149 | } 150 | 151 | var IntegerTokens = map[int]string{ 152 | 0x00: "HIMEM:", 153 | 0x02: "_", 154 | 0x03: ":", 155 | 0x04: "LOAD", 156 | 0x05: "SAVE", 157 | 0x06: "CON", 158 | 0x07: "RUN", 159 | 0x08: "RUN", 160 | 0x09: "DEL", 161 | 0x0A: ",", 162 | 0x0B: "NEW", 163 | 0x0C: "CLR", 164 | 0x0D: "AUTO", 165 | 0x0E: ",", 166 | 0x0F: "MAN", 167 | 0x10: "HIMEM:", 168 | 0x11: "LOMEM:", 169 | 0x12: "+", 170 | 0x13: "-", 171 | 0x14: "*", 172 | 0x15: "/", 173 | 0x16: "=", 174 | 0x17: "#", 175 | 0x18: ">=", 176 | 0x19: ">", 177 | 0x1A: "<=", 178 | 0x1B: "<>", 179 | 0x1C: "<", 180 | 0x1D: "AND", 181 | 0x1E: "OR", 182 | 0x1F: "MOD", 183 | 0x20: "^", 184 | 0x21: "+", 185 | 0x22: "(", 186 | 0x23: ",", 187 | 0x24: "THEN", 188 | 0x25: "THEN", 189 | 0x26: ",", 190 | 0x27: ",", 191 | 0x28: "\"", 192 | 0x29: "\"", 193 | 0x2A: "(", 194 | 0x2B: "!", 195 | 0x2C: "!", 196 | 0x2D: "(", 197 | 0x2E: "PEEK", 198 | 0x2F: "RND", 199 | 0x30: "SGN", 200 | 0x31: "ABS", 201 | 0x32: "PDL", 202 | 0x33: "RNDX", 203 | 0x34: "(", 204 | 0x35: "+", 205 | 0x36: "-", 206 | 0x37: "NOT", 207 | 0x38: "(", 208 | 0x39: "=", 209 | 0x3A: "#", 210 | 0x3B: "LEN(", 211 | 0x3C: "ASC(", 212 | 0x3D: "SCRN(", 213 | 0x3E: ",", 214 | 0x3F: "(", 215 | 0x40: "$", 216 | 0x41: "$", 217 | 0x42: "(", 218 | 0x43: ",", 219 | 0x44: ",", 220 | 0x45: ";", 221 | 0x46: ";", 222 | 0x47: ";", 223 | 0x48: ",", 224 | 0x49: ",", 225 | 0x4A: ",", 226 | 0x4B: "TEXT", 227 | 0x4C: "GR", 228 | 0x4D: "CALL", 229 | 0x4E: "DIM", 230 | 0x4F: "DIM", 231 | 0x50: "TAB", 232 | 0x51: "END", 233 | 0x52: "INPUT", 234 | 0x53: "INPUT", 235 | 0x54: "INPUT", 236 | 0x55: "FOR", 237 | 0x56: "=", 238 | 0x57: "TO", 239 | 0x58: "STEP", 240 | 0x59: "NEXT", 241 | 0x5A: ",", 242 | 0x5B: "RETURN", 243 | 0x5C: "GOSUB", 244 | 0x5D: "REM", 245 | 0x5E: "LET", 246 | 0x5F: "GOTO", 247 | 0x60: "IF", 248 | 0x61: "PRINT", 249 | 0x62: "PRINT", 250 | 0x63: "PRINT", 251 | 0x64: "POKE", 252 | 0x65: ",", 253 | 0x66: "COLOR=", 254 | 0x67: "PLOT", 255 | 0x68: ",", 256 | 0x69: "HLIN", 257 | 0x6A: ",", 258 | 0x6B: "AT", 259 | 0x6C: "VLIN", 260 | 0x6D: ",", 261 | 0x6E: "AT", 262 | 0x6F: "VTAB", 263 | 0x70: "=", 264 | 0x71: "=", 265 | 0x72: ")", 266 | 0x73: ")", 267 | 0x74: "LIST", 268 | 0x75: ",", 269 | 0x76: "LIST", 270 | 0x77: "POP", 271 | 0x78: "NODSP", 272 | 0x79: "DSP", 273 | 0x7A: "NOTRACE", 274 | 0x7B: "DSP", 275 | 0x7C: "DSP", 276 | 0x7D: "TRACE", 277 | 0x7E: "PR#", 278 | 0x7F: "IN#", 279 | } 280 | 281 | func Read16(srcptr, length *int, buffer []byte) int { 282 | 283 | // if *length < 2 { 284 | // *srcptr += *length 285 | // *length = 0 286 | // return 0 287 | // } 288 | //fmt.Printf("-- srcptr=%d, length=%d, len(buffer)=%d\n", *srcptr, *length, len(buffer)) 289 | 290 | v := int(buffer[*srcptr]) + 256*int(buffer[*srcptr+1]) 291 | 292 | *srcptr += 2 293 | *length -= 2 294 | 295 | return v 296 | 297 | } 298 | 299 | func Read8(srcptr, length *int, buffer []byte) byte { 300 | 301 | // if *length < 1 { 302 | // *srcptr += *length 303 | // *length = 0 304 | // return 0 305 | // } 306 | //fmt.Printf("-- srcptr=%d, length=%d, len(buffer)=%d\n", *srcptr, *length, len(buffer)) 307 | 308 | v := buffer[*srcptr] 309 | 310 | *srcptr += 1 311 | *length -= 1 312 | 313 | return v 314 | 315 | } 316 | 317 | func StripText(b []byte) []byte { 318 | c := make([]byte, len(b)) 319 | for i, v := range b { 320 | c[i] = v & 127 321 | } 322 | return c 323 | } 324 | 325 | func ApplesoftDetoks(data []byte) []byte { 326 | 327 | //var baseaddr int = 0x801 328 | var srcptr int = 0x00 329 | var length int = len(data) 330 | var out []byte = make([]byte, 0) 331 | 332 | if length < 2 { 333 | // not enough here 334 | return []byte("\r\n") 335 | } 336 | 337 | for length > 0 { 338 | 339 | var nextAddr int 340 | var lineNum int 341 | var inQuote bool = false 342 | var inRem bool = false 343 | 344 | if length < 2 { 345 | break 346 | } 347 | 348 | // var lineStart = len(out) 349 | 350 | nextAddr = Read16(&srcptr, &length, data) 351 | 352 | if nextAddr == 0 { 353 | break 354 | } 355 | 356 | /* output line number */ 357 | 358 | if length < 2 { 359 | break 360 | } 361 | 362 | lineNum = Read16(&srcptr, &length, data) 363 | ln := fmt.Sprintf("%d", lineNum) 364 | 365 | out = append(out, []byte(ln+" ")...) 366 | 367 | if length == 0 { 368 | break 369 | } 370 | 371 | var t byte = Read8(&srcptr, &length, data) 372 | 373 | for t != 0 && length > 0 { 374 | // process token 375 | if t&0x80 != 0 { 376 | /* token */ 377 | tokstr, ok := ApplesoftTokens[int(t)] 378 | if ok { 379 | out = append(out, []byte(" "+tokstr+" ")...) 380 | } else { 381 | out = append(out, []byte(" ERROR ")...) 382 | } 383 | if t == 0xb2 { 384 | inRem = true 385 | } 386 | } else { 387 | /* simple character */ 388 | r := rune(t) 389 | if r == '"' && !inRem { 390 | if !inQuote { 391 | out = append(out, t) 392 | } else { 393 | out = append(out, t) 394 | } 395 | inQuote = !inQuote 396 | } else if r == ':' && !inRem && !inQuote { 397 | out = append(out, t) 398 | } else if inRem && (r == '\r' || r == '\n') { 399 | out = append(out, []byte("*")...) 400 | } else { 401 | out = append(out, t) 402 | } 403 | } 404 | 405 | // Advance 406 | t = Read8(&srcptr, &length, data) 407 | } 408 | 409 | out = append(out, []byte("\n")...) 410 | 411 | // log.Printf("Line bytes: %+v", out[lineStart:]) 412 | 413 | inQuote, inRem = false, false 414 | 415 | if length == 0 { 416 | break 417 | } 418 | 419 | } 420 | 421 | //fmt.Println(string(out)) 422 | 423 | return out 424 | 425 | } 426 | 427 | func IntegerDetoks(data []byte) []byte { 428 | 429 | var srcptr int = 0x00 430 | var length int = len(data) 431 | var out []byte = make([]byte, 0) 432 | 433 | if length < 2 { 434 | // not enough here 435 | return []byte("\r\n") 436 | } 437 | 438 | for length > 0 { 439 | 440 | // starting state for line 441 | var lineLen byte 442 | var lineNum int 443 | var trailingSpace bool 444 | var newTrailingSpace bool = false 445 | 446 | // var lineStart = len(out) 447 | 448 | // read the line length 449 | lineLen = Read8(&srcptr, &length, data) 450 | 451 | if lineLen == 0 { 452 | break // zero length line found 453 | } 454 | 455 | // read line number 456 | lineNum = Read16(&srcptr, &length, data) 457 | out = append(out, []byte(fmt.Sprintf("%d ", lineNum))...) 458 | 459 | // now process line 460 | var t byte 461 | t = Read8(&srcptr, &length, data) 462 | for t != 0x01 && length > 0 { 463 | if t == 0x03 { 464 | out = append(out, []byte(" :")...) 465 | t = Read8(&srcptr, &length, data) 466 | } else if t == 0x28 { 467 | /* start of quoted text */ 468 | out = append(out, 34) 469 | 470 | t = Read8(&srcptr, &length, data) 471 | for t != 0x29 && length > 0 { 472 | out = append(out, t&0x7f) 473 | t = Read8(&srcptr, &length, data) 474 | } 475 | if t != 0x29 { 476 | break 477 | } 478 | 479 | out = append(out, 34) 480 | 481 | t = Read8(&srcptr, &length, data) 482 | } else if t == 0x5d { 483 | /* start of REM statement, run to EOL */ 484 | if trailingSpace { 485 | out = append(out, 32) 486 | } 487 | out = append(out, []byte("REM ")...) 488 | 489 | t = Read8(&srcptr, &length, data) 490 | for t != 0x01 && length > 0 { 491 | out = append(out, t&0x7f) 492 | t = Read8(&srcptr, &length, data) 493 | } 494 | if t != 0x01 { 495 | break 496 | } 497 | } else if t >= 0xb0 && t <= 0xb9 { 498 | /* start of integer constant */ 499 | if length < 2 { 500 | break 501 | } 502 | val := Read16(&srcptr, &length, data) 503 | out = append(out, []byte(fmt.Sprintf("%d", val))...) 504 | t = Read8(&srcptr, &length, data) 505 | } else if t >= 0xc1 && t <= 0xda { 506 | /* start of variable name */ 507 | for (t >= 0xc1 && t <= 0xda) || (t >= 0xb0 && t <= 0xb9) { 508 | /* note no RTF-escaped chars in this range */ 509 | out = append(out, t&0x7f) 510 | t = Read8(&srcptr, &length, data) 511 | } 512 | } else if t < 0x80 { 513 | /* found a token; try to get the whitespace right */ 514 | /* (maybe should've left whitespace on the ends of tokens 515 | that are always followed by whitespace...?) */ 516 | token, _ := IntegerTokens[int(t)] 517 | if token[0] >= 0x21 && token[0] <= 0x3f || t < 0x12 { 518 | /* does not need leading space */ 519 | out = append(out, []byte(token)...) 520 | } else { 521 | /* needs leading space; combine with prev if it exists */ 522 | if trailingSpace { 523 | out = append(out, []byte(token)...) 524 | } else { 525 | out = append(out, []byte(" "+token)...) 526 | } 527 | out = append(out, 32) 528 | } 529 | if token[len(token)-1] == 32 { 530 | newTrailingSpace = true 531 | } 532 | t = Read8(&srcptr, &length, data) 533 | } else { 534 | /* should not happen */ 535 | t = Read8(&srcptr, &length, data) 536 | } 537 | 538 | trailingSpace = newTrailingSpace 539 | newTrailingSpace = false 540 | } 541 | 542 | if t != 0x01 && length > 0 { 543 | break // must have failed 544 | } 545 | 546 | // ok, new line 547 | out = append(out, []byte("\r\n")...) 548 | 549 | } 550 | 551 | return out 552 | 553 | } 554 | 555 | func breakingChar(ch rune) bool { 556 | return ch == '(' || ch == ')' || ch == '.' || ch == ',' || ch == ';' || ch == ':' || ch == ' ' 557 | } 558 | 559 | func ApplesoftTokenize(lines []string) []byte { 560 | 561 | start := 0x801 562 | currAddr := start 563 | 564 | buffer := make([]byte, 0) 565 | 566 | for _, l := range lines { 567 | 568 | l = strings.Trim(l, " \r\n\t") 569 | if l == "" { 570 | continue 571 | } 572 | 573 | chunk := "" 574 | inqq := false 575 | tmp := strings.SplitN(l, " ", 2) 576 | var rest string 577 | var ln int 578 | if len(tmp) == 1 { 579 | ln = 0 580 | rest = "" 581 | } else { 582 | ln, _ = strconv.Atoi(tmp[0]) 583 | rest = strings.Trim(tmp[1], " ") 584 | } 585 | // lastIsTok := false 586 | 587 | linebuffer := make([]byte, 4) 588 | 589 | // LINE NUMBER 590 | linebuffer[0x02] = byte(ln & 0xff) 591 | linebuffer[0x03] = byte(ln / 0x100) 592 | 593 | // PROCESS LINE 594 | var lastKeyword string 595 | var inREM bool 596 | 597 | for _, ch := range rest { 598 | 599 | // case for a single character token, not in string 600 | if codech, ok := ApplesoftReverse[strings.ToUpper(string(ch))]; ok && !inREM && !inqq { 601 | 602 | // log.Printf("'%s' (%.2x) is a token... (lastKW=%s, chunk=%s)", string(ch), ch, lastKeyword, chunk) 603 | 604 | // 1st - is chunk + string a token? 605 | if chunk != "" { 606 | code, ok := ApplesoftReverse[strings.ToUpper(chunk+string(ch))] 607 | if ok { 608 | if strings.ToUpper(chunk+string(ch)) == "REM" || strings.ToUpper(chunk+string(ch)) == "DATA" { 609 | inREM = true 610 | } 611 | linebuffer = append(linebuffer, byte(code)) 612 | lastKeyword = chunk + string(ch) 613 | chunk = "" 614 | // lastIsTok = true 615 | continue // we absorbed it ... eg. COLOR= 616 | } else { 617 | // chunk wasn't so treat as a string 618 | // log.Printf("output (%s) by itself...", chunk) 619 | linebuffer = append(linebuffer, []byte(chunk)...) 620 | // lastIsTok = false 621 | // lastKeyword = chunk 622 | chunk = "" 623 | } 624 | } 625 | 626 | // just the symbol 627 | linebuffer = append(linebuffer, byte(codech)) 628 | lastKeyword = string(ch) 629 | chunk = "" 630 | // lastIsTok = true 631 | continue 632 | } 633 | 634 | switch { 635 | case ch < 32 || ch > 127: 636 | continue 637 | case inREM && ch != ':': 638 | chunk += string(ch) 639 | continue 640 | case inREM && ch == ':': 641 | if chunk != "" { 642 | linebuffer = append(linebuffer, []byte(chunk)...) 643 | } 644 | chunk = "" 645 | linebuffer = append(linebuffer, byte(ch)) 646 | inREM = false 647 | continue 648 | case inqq && ch != '"': 649 | linebuffer = append(linebuffer, byte(ch)) 650 | lastKeyword = "" 651 | // lastIsTok = false 652 | continue 653 | case ch == '"': 654 | linebuffer = append(linebuffer, byte(ch)) 655 | lastKeyword = "" 656 | inqq = !inqq 657 | // lastIsTok = false 658 | continue 659 | case !inqq && breakingChar(ch) && !ASPartials[chunk]: 660 | 661 | if chunk != "" { 662 | code, ok := ApplesoftReverse[strings.ToUpper(chunk+string(ch))] 663 | if ok { 664 | linebuffer = append(linebuffer, byte(code)) 665 | lastKeyword = strings.ToUpper(chunk + string(ch)) 666 | chunk = "" 667 | continue 668 | } 669 | } 670 | 671 | linebuffer = append(linebuffer, []byte(chunk)...) 672 | chunk = "" 673 | if ch != ' ' { 674 | linebuffer = append(linebuffer, byte(ch)) 675 | } 676 | lastKeyword = "" 677 | continue 678 | } 679 | 680 | if ch != ' ' { 681 | chunk += string(ch) 682 | } 683 | 684 | if lastKeyword != "" { 685 | code, ok := ApplesoftReverse[strings.ToUpper(lastKeyword+chunk)] 686 | if ok { 687 | if strings.ToUpper(lastKeyword+chunk) == "REM" || strings.ToUpper(lastKeyword+chunk) == "DATA" { 688 | inREM = true 689 | } 690 | linebuffer[len(linebuffer)-1] = byte(code) 691 | lastKeyword = lastKeyword + chunk 692 | chunk = "" 693 | continue 694 | } 695 | } 696 | 697 | code, ok := ApplesoftReverse[strings.ToUpper(chunk)] 698 | if ok { 699 | if strings.ToUpper(lastKeyword+chunk) == "REM" || strings.ToUpper(lastKeyword+chunk) == "DATA" { 700 | inREM = true 701 | } 702 | linebuffer = append(linebuffer, byte(code)) 703 | lastKeyword = chunk 704 | chunk = "" 705 | } 706 | } 707 | if chunk != "" { 708 | linebuffer = append(linebuffer, []byte(chunk)...) 709 | } 710 | 711 | //~ for i := 5; i < len(linebuffer)-1; i++ { 712 | //~ if linebuffer[i] == 0xc9 { 713 | //~ log.Printf("Found 0xC9 (-) in linebuffer...") 714 | //~ // minus token... 715 | //~ before := rune(linebuffer[i-1]) 716 | //~ after := rune(linebuffer[i+1]) 717 | //~ log.Printf("Before = %s, After = %s", string(before), string(after)) 718 | 719 | //~ if after == '.' || (after >= '0' && after <= '9') { 720 | //~ // number part 721 | //~ if before > 128 || before == ',' || before == 0xD0 { 722 | //~ log.Printf("changing - token at %d to symbol", i) 723 | //~ linebuffer[i] = byte('-') 724 | //~ } 725 | //~ } 726 | //~ } 727 | //~ } 728 | 729 | // ENDING ZERO BYTE 730 | linebuffer = append(linebuffer, 0x00) 731 | 732 | nextAddr := currAddr + len(linebuffer) 733 | linebuffer[0x00] = byte(nextAddr & 0xff) 734 | linebuffer[0x01] = byte(nextAddr / 0x100) 735 | currAddr = nextAddr 736 | 737 | buffer = append(buffer, linebuffer...) 738 | } 739 | 740 | buffer = append(buffer, 0x00, 0x00) 741 | 742 | return buffer 743 | 744 | } 745 | 746 | var reInt = regexp.MustCompile("^(-?[0-9]+)$") 747 | 748 | func isInt(s string) (bool, [3]byte) { 749 | if reInt.MatchString(s) { 750 | 751 | m := reInt.FindAllStringSubmatch(s, -1) 752 | i, _ := strconv.ParseInt(m[0][1], 10, 32) 753 | return true, [3]byte{0xb9, byte(i % 256), byte(i / 256)} 754 | 755 | } else { 756 | return false, [3]byte{0x00, 0x00, 0x00} 757 | } 758 | } 759 | 760 | func IntegerTokenize(lines []string) []byte { 761 | 762 | start := 0x801 763 | currAddr := start 764 | 765 | buffer := make([]byte, 0) 766 | 767 | var linebuffer []byte 768 | 769 | add := func(chunk string) { 770 | if chunk != "" { 771 | if ok, ival := isInt(chunk); ok { 772 | linebuffer = append(linebuffer, ival[:]...) 773 | //fmt.Printf("TOK Integer(%d)\n", int(ival[1])+256*int(ival[2])) 774 | } else { 775 | // Encode strings with high bit (0x80) set 776 | //fmt.Printf("TOK String(%s)\n", strings.ToUpper(chunk)) 777 | data := []byte(strings.ToUpper(chunk)) 778 | for i, v := range data { 779 | data[i] = v | 0x80 780 | } 781 | linebuffer = append(linebuffer, data...) 782 | } 783 | } 784 | } 785 | 786 | for _, l := range lines { 787 | 788 | l = strings.Trim(l, "\r") 789 | if l == "" { 790 | continue 791 | } 792 | 793 | chunk := "" 794 | inqq := false 795 | tmp := strings.SplitN(l, " ", 2) 796 | ln, _ := strconv.Atoi(tmp[0]) 797 | rest := strings.Trim(tmp[1], " ") 798 | 799 | linebuffer = make([]byte, 3) 800 | 801 | // LINE NUMBER 802 | linebuffer[0x01] = byte(ln & 0xff) 803 | linebuffer[0x02] = byte(ln / 0x100) 804 | 805 | // PROCESS LINE 806 | for _, ch := range rest { 807 | 808 | switch { 809 | case inqq && ch != '"': 810 | linebuffer = append(linebuffer, byte(ch|0x80)) 811 | continue 812 | case ch == ':' && !inqq: 813 | linebuffer = append(linebuffer, 0x03) 814 | continue 815 | case ch == ',' && !inqq: 816 | linebuffer = append(linebuffer, 0x0A) 817 | continue 818 | case ch == ';' && !inqq: 819 | linebuffer = append(linebuffer, 0x45) 820 | continue 821 | case ch == '(' && !inqq: 822 | linebuffer = append(linebuffer, 0x22) 823 | continue 824 | case ch == ')' && !inqq: 825 | linebuffer = append(linebuffer, 0x72) 826 | continue 827 | case ch == '+' && !inqq: 828 | linebuffer = append(linebuffer, 0x12) 829 | continue 830 | case ch == '"': 831 | inqq = !inqq 832 | if inqq { 833 | ch = 0x28 834 | } else { 835 | ch = 0x29 836 | } 837 | linebuffer = append(linebuffer, byte(ch)) 838 | continue 839 | case !inqq && breakingChar(ch): 840 | add(chunk) 841 | chunk = "" 842 | 843 | //linebuffer = append(linebuffer, byte(ch|0x80)) 844 | continue 845 | } 846 | 847 | chunk += string(ch) 848 | code, ok := IntegerReverse[strings.ToUpper(chunk)] 849 | if ok { 850 | //fmt.Printf("TOK Token(%s)\n", chunk) 851 | linebuffer = append(linebuffer, byte(code)) 852 | chunk = "" 853 | } 854 | } 855 | if chunk != "" { 856 | add(chunk) 857 | } 858 | 859 | linebuffer = append(linebuffer, 0x01) // EOL token 860 | 861 | nextAddr := currAddr + len(linebuffer) 862 | linebuffer[0x00] = byte(len(linebuffer)) 863 | currAddr = nextAddr 864 | 865 | buffer = append(buffer, linebuffer...) 866 | } 867 | 868 | // Encode file length 869 | // buffer[0] = byte((len(buffer) - 2) % 256) 870 | // buffer[1] = byte((len(buffer) - 2) / 256) 871 | 872 | return buffer 873 | 874 | } 875 | 876 | func tst() { 877 | 878 | // lines := []string{ 879 | // "10 PRINT \"HELLO WORLD!\"", 880 | // "20 GOTO 10", 881 | // } 882 | 883 | // b := IntegerTokenize(lines) 884 | 885 | // Dump(b) 886 | 887 | // os.Exit(1) 888 | 889 | } 890 | -------------------------------------------------------------------------------- /data.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/md5" 5 | "encoding/gob" 6 | "encoding/hex" 7 | "errors" 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "strings" 13 | "time" 14 | 15 | "github.com/paleotronic/diskm8/disk" 16 | "github.com/paleotronic/diskm8/loggy" 17 | ) 18 | 19 | type Disk struct { 20 | FullPath string 21 | Filename string 22 | SHA256 string // Sha of whole disk 23 | SHA256Active string // Sha of active sectors/blocks only 24 | Format string 25 | FormatID disk.DiskFormat 26 | Bitmap []bool 27 | Tracks, Sectors, Blocks int 28 | Files DiskCatalog 29 | ActiveSectors DiskSectors 30 | //ActiveBlocks DiskBlocks 31 | InactiveSectors DiskSectors 32 | //InactiveBlocks DiskBlocks 33 | MatchFactor float64 34 | MatchFiles map[*DiskFile]*DiskFile 35 | MissingFiles, ExtraFiles []*DiskFile 36 | IngestMode int 37 | source string 38 | } 39 | 40 | type ByMatchFactor []*Disk 41 | 42 | func (s ByMatchFactor) Len() int { 43 | return len(s) 44 | } 45 | 46 | func (s ByMatchFactor) Swap(i, j int) { 47 | s[i], s[j] = s[j], s[i] 48 | } 49 | 50 | func (s ByMatchFactor) Less(i, j int) bool { 51 | return s[i].MatchFactor < s[j].MatchFactor 52 | } 53 | 54 | type TypeCode int 55 | 56 | const ( 57 | TypeMask_AppleDOS TypeCode = 0x0000 58 | TypeMask_ProDOS TypeCode = 0x0100 59 | TypeMask_Pascal TypeCode = 0x0200 60 | TypeMask_RDOS TypeCode = 0x0300 61 | ) 62 | 63 | type DiskFile struct { 64 | Filename string 65 | Type string 66 | Ext string 67 | TypeCode TypeCode 68 | SHA256 string 69 | Size int 70 | LoadAddress int 71 | Text []byte 72 | Data []byte 73 | Locked bool 74 | Created time.Time 75 | Modified time.Time 76 | } 77 | 78 | func (d *DiskFile) GetNameAdorned() string { 79 | 80 | var ext string 81 | switch d.TypeCode & 0xff00 { 82 | case TypeMask_AppleDOS: 83 | ext = disk.FileType(d.TypeCode & 0xff).Ext() 84 | case TypeMask_ProDOS: 85 | ext = disk.ProDOSFileType(d.TypeCode & 0xff).Ext() 86 | case TypeMask_RDOS: 87 | ext = disk.RDOSFileType(d.TypeCode & 0xff).Ext() 88 | case TypeMask_Pascal: 89 | ext = disk.PascalFileType(d.TypeCode & 0xff).Ext() 90 | } 91 | 92 | return fmt.Sprintf("%s#0x%.4x.%s", d.Filename, d.LoadAddress, ext) 93 | 94 | } 95 | 96 | func (d *DiskFile) GetName() string { 97 | 98 | var ext string 99 | switch d.TypeCode & 0xff00 { 100 | case TypeMask_AppleDOS: 101 | ext = disk.FileType(d.TypeCode & 0xff).Ext() 102 | case TypeMask_ProDOS: 103 | ext = disk.ProDOSFileType(d.TypeCode & 0xff).Ext() 104 | case TypeMask_RDOS: 105 | ext = disk.RDOSFileType(d.TypeCode & 0xff).Ext() 106 | case TypeMask_Pascal: 107 | ext = disk.PascalFileType(d.TypeCode & 0xff).Ext() 108 | } 109 | 110 | return fmt.Sprintf("%s.%s", d.Filename, ext) 111 | 112 | } 113 | 114 | type DiskCatalog []*DiskFile 115 | type DiskSectors []*DiskSector 116 | type DiskBlocks []*DiskBlock 117 | 118 | type DiskSector struct { 119 | Track int 120 | Sector int 121 | 122 | SHA256 string 123 | 124 | Data []byte 125 | } 126 | 127 | type DiskBlock struct { 128 | Block int 129 | 130 | SHA256 string 131 | } 132 | 133 | func (i Disk) LogBitmap(id int) { 134 | 135 | l := loggy.Get(id) 136 | 137 | if i.Tracks > 0 { 138 | 139 | for t := 0; t < i.Tracks; t++ { 140 | 141 | line := fmt.Sprintf("Track %.2d: ", t) 142 | 143 | for s := 0; s < i.Sectors; s++ { 144 | if i.Bitmap[t*i.Sectors+s] { 145 | line += fmt.Sprintf("%.2x ", s) 146 | } else { 147 | line += ":: " 148 | } 149 | } 150 | 151 | l.Logf("%s", line) 152 | } 153 | 154 | } else if i.Blocks > 0 { 155 | 156 | tr := i.Blocks / 16 157 | sc := 16 158 | 159 | for t := 0; t < tr; t++ { 160 | 161 | line := fmt.Sprintf("Block %.2d: ", t) 162 | 163 | for s := 0; s < sc; s++ { 164 | if i.Bitmap[t*i.Sectors+s] { 165 | line += fmt.Sprintf("%.2x ", s) 166 | } else { 167 | line += ":: " 168 | } 169 | } 170 | 171 | l.Logf("%s", line) 172 | } 173 | 174 | } 175 | 176 | } 177 | 178 | func (d Disk) GetFilename() string { 179 | 180 | sum := md5.Sum([]byte(d.Filename)) 181 | 182 | // fmt.Printf("checksum: [%s] -> [%s]\n", d.Filename, hex.EncodeToString(sum[:])) 183 | 184 | ff := fmt.Sprintf("%s/%d", strings.Trim(filepath.Dir(d.FullPath), "/"), d.FormatID.ID) + "_" + d.SHA256 + "_" + d.SHA256Active + "_" + hex.EncodeToString(sum[:]) + ".fgp" 185 | 186 | if runtime.GOOS == "windows" { 187 | ff = strings.Replace(ff, ":", "", -1) 188 | ff = strings.Replace(ff, "\\", "/", -1) 189 | } 190 | 191 | return ff 192 | 193 | } 194 | 195 | func (d Disk) WriteToFile(filename string) error { 196 | 197 | // b, err := yaml.Marshal(d) 198 | 199 | // if err != nil { 200 | // return err 201 | // } 202 | l := loggy.Get(0) 203 | 204 | _ = os.MkdirAll(filepath.Dir(filename), 0755) 205 | 206 | f, err := os.Create(filename) 207 | if err != nil { 208 | return err 209 | } 210 | defer f.Close() 211 | enc := gob.NewEncoder(f) 212 | enc.Encode(d) 213 | 214 | l.Logf("Created %s", filename) 215 | 216 | return nil 217 | } 218 | 219 | func (d *Disk) ReadFromFile(filename string) error { 220 | // b, err := ioutil.ReadFile(filename) 221 | // if err != nil { 222 | // return err 223 | // } 224 | // err = yaml.Unmarshal(b, d) 225 | 226 | f, err := os.Open(filename) 227 | if err != nil { 228 | return err 229 | } 230 | defer f.Close() 231 | 232 | dec := gob.NewDecoder(f) 233 | err = dec.Decode(d) 234 | 235 | d.source = filename 236 | 237 | return err 238 | } 239 | 240 | // GetExactBinaryMatches returns disks with the same Global SHA256 241 | func (d *Disk) GetExactBinaryMatches(filter []string) []*Disk { 242 | 243 | l := loggy.Get(0) 244 | 245 | var out []*Disk = make([]*Disk, 0) 246 | 247 | exists, matches := existsPattern(*baseName, filter, fmt.Sprintf("%d", d.FormatID)+"_"+d.SHA256+"_*_*.fgp") 248 | if !exists { 249 | return out 250 | } 251 | 252 | for _, m := range matches { 253 | l.Logf(":: Checking %s", m) 254 | if item, err := cache.Get(m); err == nil { 255 | if item.FullPath != d.FullPath { 256 | out = append(out, item) 257 | } 258 | } 259 | } 260 | 261 | return out 262 | } 263 | 264 | // GetActiveSectorBinaryMatches returns disks with the same Active SHA256 265 | func (d *Disk) GetActiveSectorBinaryMatches(filter []string) []*Disk { 266 | 267 | l := loggy.Get(0) 268 | 269 | var out []*Disk = make([]*Disk, 0) 270 | 271 | exists, matches := existsPattern(*baseName, filter, fmt.Sprintf("%d", d.FormatID)+"_*_"+d.SHA256Active+"_*.fgp") 272 | if !exists { 273 | return out 274 | } 275 | 276 | for _, m := range matches { 277 | l.Logf(":: Checking %s", m) 278 | 279 | if item, err := cache.Get(m); err == nil { 280 | if item.FullPath != d.FullPath { 281 | out = append(out, item) 282 | } 283 | } 284 | } 285 | 286 | return out 287 | } 288 | 289 | func (d *Disk) GetFileMap() map[string]*DiskFile { 290 | 291 | out := make(map[string]*DiskFile) 292 | 293 | for _, file := range d.Files { 294 | f := file 295 | out[file.SHA256] = f 296 | } 297 | 298 | return out 299 | 300 | } 301 | 302 | func (d *Disk) GetUtilizationMap() map[string]string { 303 | 304 | out := make(map[string]string) 305 | 306 | if len(d.ActiveSectors) > 0 { 307 | 308 | for _, block := range d.ActiveSectors { 309 | 310 | key := fmt.Sprintf("T%dS%d", block.Track, block.Sector) 311 | out[key] = block.SHA256 312 | 313 | } 314 | 315 | } 316 | 317 | return out 318 | 319 | } 320 | 321 | // CompareChunks returns a value 0-1 322 | func (d *Disk) CompareChunks(b *Disk) (float64, float64, float64, float64) { 323 | 324 | l := loggy.Get(0) 325 | 326 | if d.FormatID != b.FormatID { 327 | l.Logf("Trying to compare disks of different types") 328 | return 0, 0, 0, 0 329 | } 330 | 331 | switch d.FormatID.ID { 332 | case disk.DF_RDOS_3: 333 | return d.compareSectorsPositional(b) 334 | case disk.DF_RDOS_32: 335 | return d.compareSectorsPositional(b) 336 | case disk.DF_RDOS_33: 337 | return d.compareSectorsPositional(b) 338 | case disk.DF_PASCAL: 339 | return d.compareBlocksPositional(b) 340 | case disk.DF_DOS_SECTORS_13: 341 | return d.compareSectorsPositional(b) 342 | case disk.DF_DOS_SECTORS_16: 343 | return d.compareSectorsPositional(b) 344 | case disk.DF_PRODOS: 345 | return d.compareBlocksPositional(b) 346 | case disk.DF_PRODOS_800KB: 347 | return d.compareBlocksPositional(b) 348 | } 349 | 350 | return 0, 0, 0, 0 351 | 352 | } 353 | 354 | func (d *Disk) compareSectorsPositional(b *Disk) (float64, float64, float64, float64) { 355 | 356 | l := loggy.Get(0) 357 | 358 | var sameSectors float64 359 | var diffSectors float64 360 | var dNotb float64 361 | var bNotd float64 362 | var emptySectors float64 363 | var dTotal, bTotal float64 364 | 365 | var dmap = d.GetUtilizationMap() 366 | var bmap = b.GetUtilizationMap() 367 | 368 | for t := 0; t < d.FormatID.TPD(); t++ { 369 | 370 | for s := 0; s < d.FormatID.SPT(); s++ { 371 | 372 | key := fmt.Sprintf("T%dS%d", t, s) 373 | 374 | dCk, dEx := dmap[key] 375 | bCk, bEx := bmap[key] 376 | 377 | switch { 378 | case dEx && bEx: 379 | if dCk == bCk { 380 | sameSectors += 1 381 | } else { 382 | diffSectors += 1 383 | } 384 | dTotal += 1 385 | bTotal += 1 386 | case dEx && !bEx: 387 | dNotb += 1 388 | dTotal += 1 389 | case !dEx && bEx: 390 | bNotd += 1 391 | bTotal += 1 392 | default: 393 | emptySectors += 1 394 | } 395 | 396 | } 397 | 398 | } 399 | 400 | l.Debugf("Same Sectors : %f", sameSectors) 401 | l.Debugf("Differing Sectors: %f", diffSectors) 402 | l.Debugf("Not in other disk: %f", dNotb) 403 | l.Debugf("Not in this disk : %f", bNotd) 404 | 405 | // return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal 406 | return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / bTotal 407 | 408 | } 409 | 410 | func (d *Disk) compareBlocksPositional(b *Disk) (float64, float64, float64, float64) { 411 | 412 | l := loggy.Get(0) 413 | 414 | var sameSectors float64 415 | var diffSectors float64 416 | var dNotb float64 417 | var bNotd float64 418 | var emptySectors float64 419 | var dTotal, bTotal float64 420 | 421 | var dmap = d.GetUtilizationMap() 422 | var bmap = b.GetUtilizationMap() 423 | 424 | for t := 0; t < d.FormatID.BPD(); t++ { 425 | 426 | key := fmt.Sprintf("B%d", t) 427 | 428 | dCk, dEx := dmap[key] 429 | bCk, bEx := bmap[key] 430 | 431 | switch { 432 | case dEx && bEx: 433 | if dCk == bCk { 434 | sameSectors += 1 435 | } else { 436 | diffSectors += 1 437 | } 438 | dTotal += 1 439 | bTotal += 1 440 | case dEx && !bEx: 441 | dNotb += 1 442 | dTotal += 1 443 | case !dEx && bEx: 444 | bNotd += 1 445 | bTotal += 1 446 | default: 447 | emptySectors += 1 448 | } 449 | 450 | } 451 | 452 | l.Debugf("Same Blocks : %f", sameSectors) 453 | l.Debugf("Differing Blocks : %f", diffSectors) 454 | l.Debugf("Not in other disk: %f", dNotb) 455 | l.Debugf("Not in this disk : %f", bNotd) 456 | 457 | // return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal 458 | return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / bTotal 459 | 460 | } 461 | 462 | // GetActiveSectorBinaryMatches returns disks with the same Active SHA256 463 | func (d *Disk) GetPartialMatches(filter []string) ([]*Disk, []*Disk, []*Disk) { 464 | 465 | l := loggy.Get(0) 466 | 467 | var superset []*Disk = make([]*Disk, 0) 468 | var subset []*Disk = make([]*Disk, 0) 469 | var identical []*Disk = make([]*Disk, 0) 470 | 471 | exists, matches := existsPattern(*baseName, filter, fmt.Sprintf("%d", d.FormatID)+"_*_*_*.fgp") 472 | if !exists { 473 | return superset, subset, identical 474 | } 475 | 476 | for _, m := range matches { 477 | //item := &Disk{} 478 | if item, err := cache.Get(m); err == nil { 479 | if item.FullPath != d.FullPath { 480 | // only here if not looking at same disk 481 | l.Logf(":: Checking overlapping data blocks %s", item.Filename) 482 | l.Log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 483 | dSame, iSame, dDiff, iDiff := d.CompareChunks(item) 484 | l.Logf("== This disk shares %.2f percent of its allocated blocks with %s", dSame*100, item.Filename) 485 | l.Logf("!= This disk differs %.2f percent of its allocate blocks with %s", dDiff*100, item.Filename) 486 | l.Logf("== %s shares %.2f of its blocks with this disk", item.Filename, iSame*100) 487 | l.Logf("!= %s differs %.2f of its blocks with this disk", item.Filename, iDiff*100) 488 | 489 | if dSame == 1 && iSame < 1 { 490 | superset = append(superset, item) 491 | } else if iSame == 1 && dSame < 1 { 492 | subset = append(subset, item) 493 | } else if iSame == 1 && dSame == 1 { 494 | identical = append(identical, item) 495 | } 496 | } 497 | } 498 | } 499 | 500 | return superset, subset, identical 501 | } 502 | 503 | func (d *Disk) GetPartialMatchesWithThreshold(t float64, filter []string) []*Disk { 504 | 505 | l := loggy.Get(0) 506 | 507 | var matchlist []*Disk = make([]*Disk, 0) 508 | 509 | exists, matches := existsPattern(*baseName, filter, fmt.Sprintf("%d", d.FormatID)+"_*_*_*.fgp") 510 | if !exists { 511 | return matchlist 512 | } 513 | 514 | var lastPc int = -1 515 | for i, m := range matches { 516 | //item := &Disk{} 517 | if item, err := cache.Get(m); err == nil { 518 | 519 | pc := int(100 * float64(i) / float64(len(matches))) 520 | 521 | if pc != lastPc { 522 | os.Stderr.WriteString(fmt.Sprintf("Analyzing volumes... %d%% ", pc)) 523 | } 524 | 525 | if item.FullPath != d.FullPath { 526 | // only here if not looking at same disk 527 | l.Logf(":: Checking overlapping data blocks %s", item.Filename) 528 | // l.Log("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") 529 | dSame, _, _, _ := d.CompareChunks(item) 530 | // l.Logf("== This disk shares %.2f percent of its allocated blocks with %s", dSame*100, item.Filename) 531 | // l.Logf("!= This disk differs %.2f percent of its allocate blocks with %s", dDiff*100, item.Filename) 532 | // l.Logf("== %s shares %.2f of its blocks with this disk", item.Filename, iSame*100) 533 | // l.Logf("!= %s differs %.2f of its blocks with this disk", item.Filename, iDiff*100) 534 | 535 | item.MatchFactor = dSame 536 | 537 | if dSame >= t { 538 | matchlist = append(matchlist, item) 539 | } 540 | } 541 | 542 | fmt.Print("\r") 543 | lastPc = pc 544 | } 545 | } 546 | 547 | return matchlist 548 | } 549 | 550 | func Aggregate(f func(d *Disk, collector interface{}), collector interface{}, pathfilter []string) { 551 | 552 | l := loggy.Get(0) 553 | 554 | exists, matches := existsPattern(*baseName, pathfilter, "*_*_*_*.fgp") 555 | if !exists { 556 | return 557 | } 558 | 559 | var lastPc int = -1 560 | for i, m := range matches { 561 | 562 | pc := int(100 * float64(i) / float64(len(matches))) 563 | 564 | if pc != lastPc { 565 | os.Stderr.WriteString(fmt.Sprintf("\rAggregating data... %d%% ", pc)) 566 | } 567 | 568 | l.Logf(":: Checking %s", m) 569 | //item := &Disk{} 570 | if item, err := cache.Get(m); err == nil { 571 | f(item, collector) 572 | } 573 | 574 | } 575 | 576 | os.Stderr.WriteString("Done.\n") 577 | 578 | return 579 | } 580 | 581 | func (d *Disk) CompareFiles(b *Disk) float64 { 582 | 583 | var sameFiles float64 584 | var missingFiles float64 585 | var extraFiles float64 586 | 587 | var dmap = d.GetFileMap() 588 | var bmap = b.GetFileMap() 589 | 590 | for fileCk, info := range dmap { 591 | 592 | if info.Size == 0 { 593 | continue 594 | } 595 | 596 | binfo, bEx := bmap[fileCk] 597 | 598 | if bEx { 599 | sameFiles += 1 600 | // file match 601 | if b.MatchFiles == nil { 602 | b.MatchFiles = make(map[*DiskFile]*DiskFile) 603 | } 604 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 605 | b.MatchFiles[binfo] = info 606 | } else { 607 | missingFiles += 1 608 | // file match 609 | if b.MissingFiles == nil { 610 | b.MissingFiles = make([]*DiskFile, 0) 611 | } 612 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 613 | b.MissingFiles = append(b.MissingFiles, info) 614 | } 615 | 616 | } 617 | 618 | for fileCk, info := range bmap { 619 | 620 | if info.Size == 0 { 621 | continue 622 | } 623 | 624 | _, dEx := dmap[fileCk] 625 | 626 | if !dEx { 627 | extraFiles += 1 628 | // file match 629 | if b.ExtraFiles == nil { 630 | b.ExtraFiles = make([]*DiskFile, 0) 631 | } 632 | //fmt.Printf("*** %s: %s -> %s\n", b.Filename, binfo.Filename, info.Filename) 633 | b.ExtraFiles = append(b.ExtraFiles, info) 634 | } 635 | 636 | } 637 | 638 | // return sameSectors / dTotal, sameSectors / bTotal, diffSectors / dTotal, diffSectors / btotal 639 | return sameFiles / (sameFiles + extraFiles + missingFiles) 640 | 641 | } 642 | 643 | func (d *Disk) GetPartialFileMatchesWithThreshold(t float64, filter []string) []*Disk { 644 | 645 | l := loggy.Get(0) 646 | 647 | var matchlist []*Disk = make([]*Disk, 0) 648 | 649 | exists, matches := existsPattern(*baseName, filter, "*_*_*_*.fgp") 650 | if !exists { 651 | return matchlist 652 | } 653 | 654 | var lastPc int = -1 655 | for i, m := range matches { 656 | //item := &Disk{} 657 | if item, err := cache.Get(m); err == nil { 658 | 659 | pc := int(100 * float64(i) / float64(len(matches))) 660 | 661 | if pc != lastPc { 662 | os.Stderr.WriteString(fmt.Sprintf("Analyzing volumes... %d%% ", pc)) 663 | } 664 | 665 | if item.FullPath != d.FullPath { 666 | // only here if not looking at same disk 667 | l.Logf(":: Checking overlapping files %s", item.Filename) 668 | dSame := d.CompareFiles(item) 669 | 670 | item.MatchFactor = dSame 671 | 672 | if dSame >= t { 673 | matchlist = append(matchlist, item) 674 | } 675 | } 676 | 677 | fmt.Print("\r") 678 | lastPc = pc 679 | } 680 | } 681 | 682 | return matchlist 683 | } 684 | 685 | func (d *Disk) HasFileSHA256(sha string) (bool, *DiskFile) { 686 | 687 | for _, file := range d.Files { 688 | if sha == file.SHA256 { 689 | return true, file 690 | } 691 | } 692 | 693 | return false, nil 694 | 695 | } 696 | 697 | func (d *Disk) GetFileChecksum(filename string) (bool, string) { 698 | 699 | for _, f := range d.Files { 700 | if strings.ToLower(filename) == strings.ToLower(f.Filename) { 701 | return true, f.SHA256 702 | } 703 | } 704 | 705 | return false, "" 706 | 707 | } 708 | 709 | func (d *Disk) GetFileMatches(filename string, filter []string) []*Disk { 710 | 711 | l := loggy.Get(0) 712 | 713 | var matchlist []*Disk = make([]*Disk, 0) 714 | 715 | exists, matches := existsPattern(*baseName, filter, "*_*_*_*.fgp") 716 | if !exists { 717 | return matchlist 718 | } 719 | 720 | fileexists, SHA256 := d.GetFileChecksum(filename) 721 | if !fileexists { 722 | os.Stderr.WriteString("File does not exist on this volume: " + filename + "\n") 723 | return matchlist 724 | } 725 | 726 | _, srcFile := d.HasFileSHA256(SHA256) 727 | 728 | var lastPc int = -1 729 | for i, m := range matches { 730 | //item := &Disk{} 731 | if item, err := cache.Get(m); err == nil { 732 | 733 | pc := int(100 * float64(i) / float64(len(matches))) 734 | 735 | if pc != lastPc { 736 | os.Stderr.WriteString(fmt.Sprintf("Analyzing volumes... %d%% ", pc)) 737 | } 738 | 739 | if item.FullPath != d.FullPath { 740 | // only here if not looking at same disk 741 | l.Logf(":: Checking overlapping files %s", item.Filename) 742 | 743 | if ex, file := item.HasFileSHA256(SHA256); ex { 744 | if item.MatchFiles == nil { 745 | item.MatchFiles = make(map[*DiskFile]*DiskFile) 746 | } 747 | item.MatchFiles[srcFile] = file 748 | matchlist = append(matchlist, item) 749 | } 750 | } 751 | 752 | fmt.Print("\r") 753 | lastPc = pc 754 | } 755 | } 756 | 757 | return matchlist 758 | } 759 | 760 | // Gets directory with custom format 761 | func (d *Disk) GetDirectory(format string) string { 762 | out := "" 763 | 764 | for _, file := range d.Files { 765 | 766 | tmp := format 767 | // size 768 | tmp = strings.Replace(tmp, "{size:blocks}", fmt.Sprintf("%3d Blocks", file.Size/256+1), -1) 769 | tmp = strings.Replace(tmp, "{size:kb}", fmt.Sprintf("%4d Kb", file.Size/1024+1), -1) 770 | tmp = strings.Replace(tmp, "{size:b}", fmt.Sprintf("%6d Bytes", file.Size), -1) 771 | tmp = strings.Replace(tmp, "{size}", fmt.Sprintf("%6d", file.Size), -1) 772 | // format 773 | tmp = strings.Replace(tmp, "{filename}", fmt.Sprintf("%-20s", file.Filename), -1) 774 | // type 775 | tmp = strings.Replace(tmp, "{type}", fmt.Sprintf("%-20s", file.Type), -1) 776 | // sha256 777 | tmp = strings.Replace(tmp, "{sha256}", file.SHA256, -1) 778 | // loadaddress 779 | tmp = strings.Replace(tmp, "{loadaddr}", fmt.Sprintf("0x.%4X", file.LoadAddress), -1) 780 | 781 | out += tmp + "\n" 782 | } 783 | 784 | return out 785 | } 786 | 787 | type CacheContext int 788 | 789 | const ( 790 | CC_All CacheContext = iota 791 | CC_ActiveSectors 792 | CC_AllSectors 793 | CC_Files 794 | ) 795 | 796 | type DiskMetaDataCache struct { 797 | ctx CacheContext 798 | Disks map[string]*Disk 799 | } 800 | 801 | var cache = NewCache(CC_All, "") 802 | 803 | func (c *DiskMetaDataCache) Get(filename string) (*Disk, error) { 804 | cached, ok := c.Disks[filename] 805 | if ok { 806 | return cached, nil 807 | } 808 | item := &Disk{} 809 | if err := item.ReadFromFile(filename); err == nil { 810 | c.Disks[filename] = item 811 | return item, nil 812 | } 813 | return nil, errors.New("Not found") 814 | } 815 | 816 | func (c *DiskMetaDataCache) Put(filename string, item *Disk) { 817 | c.Disks[filename] = item 818 | } 819 | 820 | func NewCache(ctx CacheContext, pattern string) *DiskMetaDataCache { 821 | 822 | cache := &DiskMetaDataCache{ 823 | ctx: ctx, 824 | Disks: make(map[string]*Disk), 825 | } 826 | 827 | return cache 828 | } 829 | 830 | func CreateCache(ctx CacheContext, pattern string, filter []string) *DiskMetaDataCache { 831 | 832 | cache := &DiskMetaDataCache{ 833 | ctx: ctx, 834 | Disks: make(map[string]*Disk), 835 | } 836 | 837 | exists, matches := existsPattern(*baseName, filter, pattern) 838 | if !exists { 839 | return cache 840 | } 841 | 842 | var lastPc int = -1 843 | for i, m := range matches { 844 | item := &Disk{} 845 | if err := item.ReadFromFile(m); err == nil { 846 | 847 | pc := int(100 * float64(i) / float64(len(matches))) 848 | 849 | if pc != lastPc { 850 | os.Stderr.WriteString(fmt.Sprintf("Caching data... %d%% ", pc)) 851 | } 852 | 853 | // Load cache 854 | cache.Put(m, item) 855 | 856 | fmt.Print("\r") 857 | lastPc = pc 858 | } 859 | } 860 | 861 | return cache 862 | } 863 | 864 | func SearchPartialFileMatchesWithThreshold(t float64, filter []string) map[string][2]*Disk { 865 | 866 | l := loggy.Get(0) 867 | 868 | matchlist := make(map[string][2]*Disk) 869 | 870 | exists, matches := existsPattern(*baseName, filter, "*_*_*_*.fgp") 871 | if !exists { 872 | return matchlist 873 | } 874 | 875 | done := make(map[string]bool) 876 | 877 | var lastPc int = -1 878 | for i, m := range matches { 879 | //item := &Disk{} 880 | if disk, err := cache.Get(m); err == nil { 881 | 882 | d := *disk 883 | 884 | pc := int(100 * float64(i) / float64(len(matches))) 885 | 886 | if pc != lastPc { 887 | os.Stderr.WriteString(fmt.Sprintf("Analyzing volumes... %d%% ", pc)) 888 | } 889 | 890 | for _, n := range matches { 891 | 892 | if jj, err := cache.Get(n); err == nil { 893 | 894 | item := *jj 895 | 896 | key := d.SHA256 + ":" + item.SHA256 897 | if item.SHA256 < d.SHA256 { 898 | key = item.SHA256 + ":" + d.SHA256 899 | } 900 | 901 | if _, ok := done[key]; ok { 902 | continue 903 | } 904 | 905 | if item.FullPath != d.FullPath { 906 | // only here if not looking at same disk 907 | l.Logf(":: Checking overlapping files %s", item.Filename) 908 | dSame := d.CompareFiles(&item) 909 | 910 | item.MatchFactor = dSame 911 | 912 | if dSame >= t { 913 | matchlist[key] = [2]*Disk{&d, &item} 914 | } 915 | } 916 | 917 | done[key] = true 918 | 919 | } 920 | 921 | } 922 | 923 | fmt.Print("\r") 924 | lastPc = pc 925 | } 926 | } 927 | 928 | return matchlist 929 | } 930 | 931 | const ingestWorkers = 4 932 | const processWorkers = 6 933 | 934 | func exists(path string) bool { 935 | 936 | _, err := os.Stat(path) 937 | if err != nil { 938 | return false 939 | } 940 | return true 941 | 942 | } 943 | -------------------------------------------------------------------------------- /disk/diskimageappledos.go: -------------------------------------------------------------------------------- 1 | package disk 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "regexp" 7 | "strings" 8 | ) 9 | 10 | type FileType byte 11 | 12 | const ( 13 | FileTypeTXT FileType = 0x00 14 | FileTypeINT FileType = 0x01 15 | FileTypeAPP FileType = 0x02 16 | FileTypeBIN FileType = 0x04 17 | FileTypeS FileType = 0x08 18 | FileTypeREL FileType = 0x10 19 | FileTypeA FileType = 0x20 20 | FileTypeB FileType = 0x40 21 | ) 22 | 23 | var AppleDOSTypeMap = map[FileType][2]string{ 24 | 0x00: [2]string{"TXT", "ASCII Text"}, 25 | 0x01: [2]string{"INT", "Integer Basic Program"}, 26 | 0x02: [2]string{"BAS", "Applesoft Basic Program"}, 27 | 0x04: [2]string{"BIN", "Binary File"}, 28 | 0x08: [2]string{"S", "S File Type"}, 29 | 0x10: [2]string{"REL", "Relocatable Object Code"}, 30 | 0x20: [2]string{"A", "A File Type"}, 31 | 0x40: [2]string{"B", "B File Type"}, 32 | } 33 | 34 | func (ft FileType) String() string { 35 | 36 | info, ok := AppleDOSTypeMap[ft] 37 | if ok { 38 | return info[1] 39 | } 40 | 41 | return "Unknown" 42 | } 43 | 44 | func AppleDOSFileTypeFromExt(ext string) FileType { 45 | for ft, info := range AppleDOSTypeMap { 46 | if strings.ToUpper(ext) == info[0] { 47 | return ft 48 | } 49 | } 50 | return 0x04 51 | } 52 | 53 | func (ft FileType) Ext() string { 54 | 55 | info, ok := AppleDOSTypeMap[ft] 56 | if ok { 57 | return info[0] 58 | } 59 | 60 | return "BIN" 61 | } 62 | 63 | type FileDescriptor struct { 64 | Data []byte 65 | trackid, sectorid int 66 | sectoroffset int 67 | } 68 | 69 | func (fd *FileDescriptor) SetData(data []byte, t, s, o int) { 70 | 71 | fd.trackid = t 72 | fd.sectorid = s 73 | fd.sectoroffset = o 74 | 75 | if fd.Data == nil && len(data) == 35 { 76 | fd.Data = data 77 | } 78 | 79 | for i, v := range data { 80 | fd.Data[i] = v 81 | } 82 | } 83 | 84 | func (fd *FileDescriptor) Publish(dsk *DSKWrapper) error { 85 | 86 | err := dsk.Seek(fd.trackid, fd.sectorid) 87 | if err != nil { 88 | return err 89 | } 90 | block := dsk.Read() 91 | 92 | for i, v := range fd.Data { 93 | block[fd.sectoroffset+i] = v 94 | } 95 | 96 | dsk.Write(block) 97 | 98 | return nil 99 | 100 | } 101 | 102 | func (fd *FileDescriptor) IsUnused() bool { 103 | return fd.Data[0] == 0xff || fd.Data[0] == 0x00 || fd.Type().String() == "Unknown" || fd.TotalSectors() == 0 104 | } 105 | 106 | func (fd *FileDescriptor) GetTrackSectorListStart() (int, int) { 107 | return int(fd.Data[0]), int(fd.Data[1]) 108 | } 109 | 110 | func (fd *FileDescriptor) IsLocked() bool { 111 | return (fd.Data[2]&0x80 != 0) 112 | } 113 | 114 | func (fd *FileDescriptor) SetLocked(b bool) { 115 | fd.Data[2] = fd.Data[2] & 0x7f 116 | if b { 117 | fd.Data[2] = fd.Data[2] | 0x80 118 | } 119 | } 120 | 121 | func (fd *FileDescriptor) Type() FileType { 122 | return FileType(fd.Data[2] & 0x7f) 123 | } 124 | 125 | func (fd *FileDescriptor) SetType(t FileType) { 126 | fd.Data[0x02] = byte(t) 127 | } 128 | 129 | func AsciiToPoke(b byte) byte { 130 | if b < 32 || b > 127 { 131 | b = 32 132 | } 133 | return b | 128 134 | } 135 | 136 | func (fd *FileDescriptor) SetName(s string) { 137 | maxlen := len(s) 138 | if maxlen > 30 { 139 | maxlen = 30 140 | } 141 | for i := 0; i < 30; i++ { 142 | fd.Data[0x03+i] = 0xa0 143 | } 144 | for i, b := range []byte(s) { 145 | if i >= maxlen { 146 | break 147 | } 148 | fd.Data[0x03+i] = AsciiToPoke(b) 149 | } 150 | } 151 | 152 | func (fd *FileDescriptor) Name() string { 153 | r := fd.Data[0x03:0x21] 154 | s := "" 155 | for _, v := range r { 156 | ch := PokeToAscii(uint(v), false) 157 | s = s + string(ch) 158 | } 159 | 160 | s = strings.ToLower(strings.Trim(s, " ")) 161 | 162 | switch fd.Type() { 163 | case FileTypeAPP: 164 | s += ".a" 165 | case FileTypeINT: 166 | s += ".i" 167 | case FileTypeBIN: 168 | s += ".s" 169 | case FileTypeTXT: 170 | s += ".t" 171 | } 172 | 173 | return s 174 | } 175 | 176 | func (fd *FileDescriptor) NameUnadorned() string { 177 | r := fd.Data[0x03:0x21] 178 | s := "" 179 | for _, v := range r { 180 | ch := PokeToAscii(uint(v), false) 181 | s = s + string(ch) 182 | } 183 | 184 | s = strings.ToLower(strings.Trim(s, " ")) 185 | 186 | return s 187 | } 188 | 189 | func (fd *FileDescriptor) NameBytes() []byte { 190 | return fd.Data[0x03:0x21] 191 | } 192 | 193 | func (fd *FileDescriptor) NameOK() bool { 194 | for _, v := range fd.NameBytes() { 195 | if v < 32 { 196 | return false 197 | } 198 | } 199 | 200 | return true 201 | } 202 | 203 | func (fd *FileDescriptor) TotalSectors() int { 204 | return int(fd.Data[0x21]) + 256*int(fd.Data[0x22]) 205 | } 206 | 207 | func (fd *FileDescriptor) SetTotalSectors(v int) { 208 | fmt.Printf("Call to set sector count to %d\n", v) 209 | fd.Data[0x21] = byte(v & 0xff) 210 | fd.Data[0x22] = byte(v / 0x100) 211 | fmt.Printf("Sector count bytes are %d, %d\n", fd.Data[0x21], fd.Data[0x22]) 212 | } 213 | 214 | func (fd *FileDescriptor) SetTrackSectorListStart(t, s int) { 215 | fd.Data[0] = byte(t) 216 | fd.Data[1] = byte(s) 217 | } 218 | 219 | type VTOC struct { 220 | Data [256]byte 221 | t, s int 222 | } 223 | 224 | func (fd *VTOC) SetData(data []byte, t, s int) { 225 | fd.t, fd.s = t, s 226 | for i, v := range data { 227 | fd.Data[i] = v 228 | } 229 | } 230 | 231 | func (fd *VTOC) GetCatalogStart() (int, int) { 232 | return int(fd.Data[1]), int(fd.Data[2]) 233 | } 234 | 235 | func (fd *VTOC) GetDOSVersion() byte { 236 | return fd.Data[3] 237 | } 238 | 239 | func (fd *VTOC) GetVolumeID() byte { 240 | return fd.Data[6] 241 | } 242 | 243 | func (fd *VTOC) GetMaxTSPairsPerSector() int { 244 | return int(fd.Data[0x27]) 245 | } 246 | 247 | func (fd *VTOC) GetTracks() int { 248 | return int(fd.Data[0x34]) 249 | } 250 | 251 | func (fd *VTOC) GetSectors() int { 252 | return int(fd.Data[0x35]) 253 | } 254 | 255 | func (fd *VTOC) GetTrackOrder() int { 256 | return int(fd.Data[0x31]) 257 | } 258 | 259 | func (fd *VTOC) BytesPerSector() int { 260 | size := int(fd.Data[0x36]) + 256*int(fd.Data[0x37]) 261 | if size < 256 { 262 | size = 256 263 | } 264 | return size 265 | } 266 | 267 | func (fd *VTOC) IsTSFree(t, s int) bool { 268 | offset := 0x38 + t*4 269 | if s < 8 { 270 | offset++ 271 | } 272 | bitmask := byte(1 << uint(s&0x7)) 273 | 274 | return (fd.Data[offset]&bitmask != 0) 275 | } 276 | 277 | // SetTSFree marks a T/S free or not 278 | func (fd *VTOC) SetTSFree(t, s int, b bool) { 279 | offset := 0x38 + t*4 280 | if s < 8 { 281 | offset++ 282 | } 283 | bitmask := byte(1 << uint(s&0x7)) 284 | clrmask := 0xff ^ bitmask 285 | 286 | v := fd.Data[offset] 287 | if b { 288 | v |= bitmask 289 | } else { 290 | v &= clrmask 291 | } 292 | 293 | fd.Data[offset] = v 294 | } 295 | 296 | func (fd *VTOC) Publish(dsk *DSKWrapper) error { 297 | err := dsk.Seek(fd.t, fd.s) 298 | if err != nil { 299 | return err 300 | } 301 | 302 | dsk.Write(fd.Data[:]) 303 | 304 | return nil 305 | } 306 | 307 | func (fd *VTOC) DumpMap() { 308 | 309 | fmt.Printf("Disk has %d tracks and %d sectors per track, %d bytes per sector (ordering %d)...\n", fd.GetTracks(), fd.GetSectors(), fd.BytesPerSector(), fd.GetTrackOrder()) 310 | fmt.Printf("Volume ID is %d\n", fd.GetVolumeID()) 311 | ct, cs := fd.GetCatalogStart() 312 | fmt.Printf("Catalog starts at T%d, S%d\n", ct, cs) 313 | 314 | tcount := fd.GetTracks() 315 | scount := fd.GetSectors() 316 | 317 | for t := 0; t < tcount; t++ { 318 | fmt.Printf("TRACK %.2x: |", t) 319 | for s := 0; s < scount; s++ { 320 | if fd.IsTSFree(t, s) { 321 | fmt.Print(".") 322 | } else { 323 | fmt.Print("X") 324 | } 325 | } 326 | fmt.Println("|") 327 | } 328 | 329 | } 330 | 331 | func (dsk *DSKWrapper) IsAppleDOS() (bool, DiskFormat, SectorOrder) { 332 | 333 | oldFormat := dsk.Format 334 | oldLayout := dsk.Layout 335 | 336 | defer func() { 337 | dsk.Format = oldFormat 338 | dsk.Layout = oldLayout 339 | }() 340 | 341 | if len(dsk.Data) == STD_DISK_BYTES { 342 | 343 | layouts := []SectorOrder{SectorOrderDOS33, SectorOrderDOS33Alt, SectorOrderProDOS, SectorOrderProDOSLinear} 344 | 345 | for _, l := range layouts { 346 | 347 | dsk.Layout = l 348 | 349 | vtoc, err := dsk.AppleDOSGetVTOC() 350 | if err != nil { 351 | continue 352 | } 353 | 354 | if vtoc.GetTracks() != 35 || vtoc.GetSectors() != 16 { 355 | continue 356 | } 357 | 358 | _, files, err := dsk.AppleDOSGetCatalog("*") 359 | if err != nil { 360 | continue 361 | } 362 | 363 | if len(files) > 0 { 364 | return true, GetDiskFormat(DF_DOS_SECTORS_16), l 365 | } 366 | 367 | } 368 | 369 | } else if len(dsk.Data) == STD_DISK_BYTES_OLD { 370 | 371 | layouts := []SectorOrder{SectorOrderDOS33, SectorOrderDOS33Alt, SectorOrderProDOS, SectorOrderProDOSLinear} 372 | 373 | dsk.Format = GetDiskFormat(DF_DOS_SECTORS_13) 374 | 375 | for _, l := range layouts { 376 | dsk.Layout = l 377 | 378 | vtoc, err := dsk.AppleDOSGetVTOC() 379 | if err != nil { 380 | continue 381 | } 382 | 383 | if vtoc.GetTracks() != 35 || vtoc.GetSectors() != 13 { 384 | continue 385 | } 386 | 387 | _, files, err := dsk.AppleDOSGetCatalog("*") 388 | if err != nil { 389 | continue 390 | } 391 | 392 | if len(files) > 0 { 393 | return true, GetDiskFormat(DF_DOS_SECTORS_13), l 394 | } 395 | 396 | } 397 | 398 | } 399 | 400 | return false, oldFormat, oldLayout 401 | 402 | } 403 | 404 | func (d *DSKWrapper) AppleDOSReadFileRaw(fd FileDescriptor) (int, int, []byte, error) { 405 | 406 | data, e := d.AppleDOSReadFileSectors(fd, -1) 407 | 408 | if e != nil || len(data) == 0 { 409 | return 0, 0, data, e 410 | } 411 | 412 | switch fd.Type() { 413 | case FileTypeINT: 414 | l := int(data[0]) + 256*int(data[1]) 415 | if l+2 > len(data) { 416 | l = len(data) - 2 417 | } 418 | return l, 0x801, data[2 : 2+l], nil 419 | case FileTypeAPP: 420 | l := int(data[0]) + 256*int(data[1]) 421 | if l+2 > len(data) { 422 | l = len(data) - 2 423 | } 424 | return l, 0x801, data[2 : 2+l], nil 425 | case FileTypeTXT: 426 | return len(data), 0x0000, data, nil 427 | case FileTypeBIN: 428 | addr := int(data[0]) + 256*int(data[1]) 429 | l := int(data[2]) + 256*int(data[3]) 430 | if l+4 > len(data) { 431 | l = len(data) - 4 432 | } 433 | //fmt.Printf("%x, %x, %x\n", l, addr, len(data)) 434 | return l, addr, data[4 : 4+l], nil 435 | default: 436 | l := int(data[0]) + 256*int(data[1]) 437 | if l+2 > len(data) { 438 | l = len(data) - 2 439 | } 440 | return l, 0, data[2 : 2+l], nil 441 | } 442 | 443 | } 444 | 445 | func (d *DSKWrapper) AppleDOSGetVTOC() (*VTOC, error) { 446 | e := d.Seek(17, 0) 447 | if e != nil { 448 | return nil, e 449 | } 450 | data := d.Read() 451 | 452 | vtoc := &VTOC{} 453 | vtoc.SetData(data, 17, 0) 454 | return vtoc, nil 455 | } 456 | 457 | func (dsk *DSKWrapper) AppleDOSUsedBitmap() ([]bool, error) { 458 | 459 | var out []bool = make([]bool, dsk.Format.TPD()*dsk.Format.SPT()) 460 | 461 | _, files, err := dsk.AppleDOSGetCatalog("*") 462 | if err != nil { 463 | return out, err 464 | } 465 | 466 | for _, f := range files { 467 | tslist, err := dsk.AppleDOSGetFileSectors(f, 0) 468 | if err == nil { 469 | for _, pair := range tslist { 470 | track := pair[0] 471 | sector := pair[1] 472 | out[track*dsk.Format.SPT()+sector] = true 473 | } 474 | } 475 | } 476 | 477 | return out, nil 478 | 479 | } 480 | 481 | func (d *DSKWrapper) AppleDOSGetCatalog(pattern string) (*VTOC, []FileDescriptor, error) { 482 | 483 | var files []FileDescriptor 484 | var e error 485 | var vtoc *VTOC 486 | 487 | vtoc, e = d.AppleDOSGetVTOC() 488 | if e != nil { 489 | return vtoc, files, e 490 | } 491 | 492 | count := 0 493 | ct, cs := vtoc.GetCatalogStart() 494 | 495 | e = d.Seek(ct, cs) 496 | if e != nil { 497 | return vtoc, files, e 498 | } 499 | 500 | data := d.Read() 501 | 502 | var re *regexp.Regexp 503 | if pattern != "" { 504 | patterntmp := strings.Replace(pattern, ".", "[.]", -1) 505 | patterntmp = strings.Replace(patterntmp, "*", ".*", -1) 506 | patterntmp = "(?i)^" + patterntmp + "$" 507 | re = regexp.MustCompile(patterntmp) 508 | } 509 | 510 | for e == nil && count < 105 { 511 | slot := count % 7 512 | pos := 0x0b + 35*slot 513 | 514 | fd := FileDescriptor{} 515 | fd.SetData(data[pos:pos+35], ct, cs, pos) 516 | 517 | var skipname bool = false 518 | if re != nil { 519 | skipname = !re.MatchString(fd.Name()) 520 | } 521 | 522 | if fd.Data[0] != 0xff && fd.Data[0] != 0x00 && fd.Type().String() != "Unknown" && !skipname { 523 | files = append(files, fd) 524 | } 525 | count++ 526 | if count%7 == 0 { 527 | // move to next catalog sector 528 | ct = int(data[1]) 529 | cs = int(data[2]) 530 | if ct == 0 { 531 | return vtoc, files, nil 532 | } 533 | e = d.Seek(ct, cs) 534 | if e != nil { 535 | return vtoc, files, e 536 | } 537 | data = d.Read() 538 | } 539 | } 540 | 541 | return vtoc, files, nil 542 | } 543 | 544 | func (d *DSKWrapper) AppleDOSGetFileSectors(fd FileDescriptor, maxblocks int) ([][2]int, error) { 545 | var e error 546 | var data []byte 547 | tl, sl := fd.GetTrackSectorListStart() 548 | 549 | // var tracks []int 550 | // var sectors []int 551 | 552 | var tslist [][2]int 553 | 554 | var tsmap = make(map[int]int) 555 | 556 | for e == nil && (tl != 0 || sl != 0) { 557 | // Get TS List 558 | e = d.Seek(tl, sl) 559 | if e != nil { 560 | return tslist, e 561 | } 562 | data = d.Read() 563 | 564 | //fmt.Printf("DEBUG: T/S List follows from T%d, S%d:\n", tl, sl) 565 | //Dump(data) 566 | 567 | ptr := 0x0c 568 | for ptr < 0x100 { 569 | // check entry 570 | t, s := int(data[ptr]), int(data[ptr+1]) 571 | 572 | if t == 0 && s == 0 || t >= d.Format.TPD() || s >= d.Format.SPT() { 573 | //fmt.Println("BREAK ptr =", ptr, len(tracks)) 574 | break 575 | } 576 | 577 | //fmt.Printf("File block at T%d, S%d\n", t, s) 578 | 579 | // tracks = append(tracks, t) 580 | // sectors = append(sectors, s) 581 | 582 | tslist = append(tslist, [2]int{t, s}) 583 | 584 | // next entry 585 | ptr += 2 586 | } 587 | 588 | // get next TS List block 589 | ntl, nsl := int(data[1]), int(data[2]) 590 | if _, ex := tsmap[100*ntl+nsl]; ex { 591 | //fmt.Printf("circular ts list") 592 | break 593 | } 594 | 595 | tl, sl = ntl, nsl 596 | 597 | tsmap[100*tl+sl] = 1 598 | 599 | //fmt.Printf("Next Track Sector list is at T%d, S%d (%d)\n", tl, sl, len(tracks)) 600 | 601 | } 602 | 603 | return tslist, nil 604 | } 605 | 606 | func (d *DSKWrapper) AppleDOSReadFileSectors(fd FileDescriptor, maxblocks int) ([]byte, error) { 607 | var e error 608 | var data []byte 609 | var file []byte 610 | tl, sl := fd.GetTrackSectorListStart() 611 | 612 | var tracks []int 613 | var sectors []int 614 | 615 | var tsmap = make(map[int]int) 616 | 617 | for e == nil && (tl != 0 || sl != 0) { 618 | // Get TS List 619 | e = d.Seek(tl, sl) 620 | if e != nil { 621 | return file, e 622 | } 623 | data = d.Read() 624 | 625 | //fmt.Printf("DEBUG: T/S List follows from T%d, S%d:\n", tl, sl) 626 | //Dump(data) 627 | 628 | ptr := 0x0c 629 | for ptr < 0x100 { 630 | // check entry 631 | t, s := int(data[ptr]), int(data[ptr+1]) 632 | 633 | if t == 0 && s == 0 || t >= d.Format.TPD() || s >= d.Format.SPT() { 634 | //fmt.Println("BREAK ptr =", ptr, len(tracks)) 635 | break 636 | } 637 | 638 | //fmt.Printf("File block at T%d, S%d\n", t, s) 639 | 640 | tracks = append(tracks, t) 641 | sectors = append(sectors, s) 642 | 643 | // next entry 644 | ptr += 2 645 | } 646 | 647 | // get next TS List block 648 | ntl, nsl := int(data[1]), int(data[2]) 649 | if _, ex := tsmap[100*ntl+nsl]; ex { 650 | //fmt.Printf("circular ts list") 651 | break 652 | } 653 | 654 | tl, sl = ntl, nsl 655 | 656 | tsmap[100*tl+sl] = 1 657 | 658 | //fmt.Printf("Next Track Sector list is at T%d, S%d (%d)\n", tl, sl, len(tracks)) 659 | 660 | } 661 | 662 | // Here got T/S list 663 | //fmt.Println("READING FILE") 664 | blocksread := 0 665 | for i, t := range tracks { 666 | s := sectors[i] 667 | 668 | //fmt.Printf("TS Fetch #%d: Track %d, %d\n", i, t, s) 669 | 670 | e = d.Seek(t, s) 671 | if e != nil { 672 | return file, e 673 | } 674 | c := d.Read() 675 | 676 | //Dump(c) 677 | 678 | file = append(file, c...) 679 | blocksread++ 680 | 681 | if maxblocks != -1 && blocksread >= maxblocks { 682 | break 683 | } 684 | } 685 | 686 | return file, nil 687 | } 688 | 689 | func (d *DSKWrapper) AppleDOSGetTSListSectors(fd FileDescriptor, maxblocks int) ([][2]int, error) { 690 | var e error 691 | var data []byte 692 | 693 | tl, sl := fd.GetTrackSectorListStart() 694 | 695 | var tslist [][2]int 696 | 697 | var tsmap = make(map[int]int) 698 | 699 | for e == nil && (tl != 0 || sl != 0) { 700 | // Get TS List 701 | e = d.Seek(tl, sl) 702 | if e != nil { 703 | return tslist, e 704 | } 705 | data = d.Read() 706 | 707 | tslist = append(tslist, [2]int{tl, sl}) 708 | 709 | // get next TS List block 710 | ntl, nsl := int(data[1]), int(data[2]) 711 | if _, ex := tsmap[100*ntl+nsl]; ex { 712 | break 713 | } 714 | 715 | tl, sl = ntl, nsl 716 | 717 | tsmap[100*tl+sl] = 1 718 | 719 | } 720 | 721 | return tslist, nil 722 | } 723 | 724 | func (d *DSKWrapper) AppleDOSReadFile(fd FileDescriptor) (int, int, []byte, error) { 725 | 726 | data, e := d.AppleDOSReadFileSectors(fd, -1) 727 | 728 | if e != nil { 729 | return 0, 0, data, e 730 | } 731 | 732 | switch fd.Type() { 733 | case FileTypeINT: 734 | l := int(data[0]) + 256*int(data[1]) 735 | return l, 0x801, IntegerDetoks(data[2 : 2+l]), nil 736 | case FileTypeAPP: 737 | l := int(data[0]) + 256*int(data[1]) 738 | return l, 0x801, ApplesoftDetoks(data[2 : 2+l]), nil 739 | case FileTypeTXT: 740 | return len(data), 0x0000, data, nil 741 | case FileTypeBIN: 742 | addr := int(data[0]) + 256*int(data[1]) 743 | l := int(data[2]) + 256*int(data[3]) 744 | //fmt.Printf("%x, %x, %x\n", l, addr, len(data)) 745 | return l, addr, data[4 : 4+l], nil 746 | default: 747 | l := int(data[0]) + 256*int(data[1]) 748 | return l, 0, data[2 : 2+l], nil 749 | } 750 | 751 | } 752 | 753 | // AppleDOSGetFreeSectors tries to find free sectors for certain size file... 754 | // Remember, we need space for the T/S list as well... 755 | func (dsk *DSKWrapper) AppleDOSGetFreeSectors(size int) ([][2]int, [][2]int, error) { 756 | 757 | needed := make([][2]int, 0) 758 | vtoc, err := dsk.AppleDOSGetVTOC() 759 | if err != nil { 760 | return nil, nil, err 761 | } 762 | 763 | catTrack, _ := vtoc.GetCatalogStart() 764 | 765 | // needed: 766 | // size/256 + 1 for data 767 | // 1 for T/S list 768 | dataBlocks := (size / 256) + 1 769 | tsListBlocks := (dataBlocks / vtoc.GetMaxTSPairsPerSector()) + 1 770 | totalBlocks := tsListBlocks + dataBlocks 771 | 772 | for t := dsk.Format.TPD() - 1; t >= 0; t-- { 773 | 774 | if t == catTrack { 775 | continue // skip catalog track 776 | } 777 | 778 | for s := dsk.Format.SPT() - 1; s >= 0; s-- { 779 | 780 | if len(needed) >= totalBlocks { 781 | break 782 | } 783 | 784 | if vtoc.IsTSFree(t, s) { 785 | needed = append(needed, [2]int{t, s}) 786 | } 787 | } 788 | 789 | } 790 | 791 | if len(needed) >= totalBlocks { 792 | return needed[:tsListBlocks], needed[tsListBlocks:], nil 793 | } 794 | 795 | return nil, nil, errors.New("Insufficent space") 796 | 797 | } 798 | 799 | func (d *DSKWrapper) AppleDOSNextFreeCatalogEntry(name string) (*FileDescriptor, error) { 800 | 801 | var e error 802 | var vtoc *VTOC 803 | 804 | vtoc, e = d.AppleDOSGetVTOC() 805 | if e != nil { 806 | return nil, e 807 | } 808 | 809 | count := 0 810 | ct, cs := vtoc.GetCatalogStart() 811 | 812 | e = d.Seek(ct, cs) 813 | if e != nil { 814 | return nil, e 815 | } 816 | 817 | data := d.Read() 818 | 819 | for e == nil && count < 105 { 820 | fmt.Printf("AppleDOSNextFreeCatalogEntry: checking entry %d\n", count) 821 | slot := count % 7 822 | pos := 0x0b + 35*slot 823 | 824 | fd := FileDescriptor{} 825 | fd.SetData(data[pos:pos+35], ct, cs, pos) 826 | 827 | if fd.IsUnused() { 828 | fmt.Printf("Is unused\n") 829 | return &fd, nil 830 | } else if name != "" && strings.ToLower(fd.NameUnadorned()) == strings.ToLower(name) { 831 | fmt.Printf("Name match found\n") 832 | return &fd, nil 833 | } 834 | count++ 835 | if count%7 == 0 { 836 | // move to next catalog sector 837 | ct = int(data[1]) 838 | cs = int(data[2]) 839 | if ct == 0 { 840 | return nil, nil 841 | } 842 | e = d.Seek(ct, cs) 843 | if e != nil { 844 | return nil, e 845 | } 846 | data = d.Read() 847 | } 848 | } 849 | 850 | return nil, errors.New("No free entry") 851 | } 852 | 853 | func (d *DSKWrapper) AppleDOSNamedCatalogEntry(name string) (*FileDescriptor, error) { 854 | 855 | var e error 856 | var vtoc *VTOC 857 | 858 | vtoc, e = d.AppleDOSGetVTOC() 859 | if e != nil { 860 | return nil, e 861 | } 862 | 863 | count := 0 864 | ct, cs := vtoc.GetCatalogStart() 865 | 866 | e = d.Seek(ct, cs) 867 | if e != nil { 868 | return nil, e 869 | } 870 | 871 | data := d.Read() 872 | 873 | for e == nil && count < 105 { 874 | slot := count % 7 875 | pos := 0x0b + 35*slot 876 | 877 | fd := FileDescriptor{} 878 | fd.SetData(data[pos:pos+35], ct, cs, pos) 879 | 880 | //fmt.Printf("FILE NAME CHECK [%s] vs [%s]\n", strings.ToLower(fd.NameUnadorned()), strings.ToLower(name)) 881 | 882 | if name != "" && strings.ToLower(fd.NameUnadorned()) == strings.ToLower(name) { 883 | return &fd, nil 884 | } 885 | count++ 886 | if count%7 == 0 { 887 | // move to next catalog sector 888 | ct = int(data[1]) 889 | cs = int(data[2]) 890 | if ct == 0 { 891 | return nil, errors.New("Not found") 892 | } 893 | e = d.Seek(ct, cs) 894 | if e != nil { 895 | return nil, e 896 | } 897 | data = d.Read() 898 | } 899 | } 900 | 901 | return nil, errors.New("Not found") 902 | } 903 | 904 | func (dsk *DSKWrapper) AppleDOSWriteFile(name string, kind FileType, data []byte, loadAddr int) error { 905 | 906 | name = strings.ToUpper(name) 907 | 908 | vtoc, err := dsk.AppleDOSGetVTOC() 909 | if err != nil { 910 | return err 911 | } 912 | 913 | fmt.Printf("Writing file with type %s\n", kind.Ext()) 914 | 915 | if kind & FileTypeBIN != 0 { 916 | l := len(data) 917 | fmt.Printf("Length is %d\n", l) 918 | header := []byte{byte(l % 256), byte(l / 256)} 919 | data = append(header, data...) 920 | } 921 | 922 | if kind & FileTypeTXT == 0 { 923 | header := []byte{byte(loadAddr % 256), byte(loadAddr / 256)} 924 | data = append(header, data...) 925 | } 926 | 927 | // try get catalog entry - delete existing if match found 928 | fd, err := dsk.AppleDOSNamedCatalogEntry(name) 929 | if err == nil { 930 | if kind != fd.Type() { 931 | return errors.New("File type mismatch") 932 | } else { 933 | // need to delete this file... 934 | err = dsk.AppleDOSRemoveFile(fd) 935 | if err != nil { 936 | return err 937 | } 938 | // reread vtoc here 939 | vtoc, err = dsk.AppleDOSGetVTOC() 940 | if err != nil { 941 | return err 942 | } 943 | } 944 | } else { 945 | fd, err = dsk.AppleDOSNextFreeCatalogEntry(name) 946 | if err != nil { 947 | return err 948 | } 949 | } 950 | 951 | // 1st: check we have sufficient space... 952 | tsBlocks, dataBlocks, err := dsk.AppleDOSGetFreeSectors(len(data)) 953 | if err != nil { 954 | return err 955 | } 956 | 957 | // 2nd: check we can get a free catalog entry 958 | sectorCount := len(dataBlocks) + len(tsBlocks) 959 | 960 | // 3rd: Write the datablocks 961 | var block int = 0 962 | for len(data) > 0 { 963 | 964 | max := STD_BYTES_PER_SECTOR 965 | if len(data) < STD_BYTES_PER_SECTOR { 966 | max = len(data) 967 | } 968 | chunk := data[:max] 969 | // Pad final sector with 0x00 bytes 970 | for len(chunk) < STD_BYTES_PER_SECTOR { 971 | chunk = append(chunk, 0x00) 972 | } 973 | data = data[max:] 974 | 975 | pair := dataBlocks[block] 976 | 977 | track, sector := pair[0], pair[1] 978 | 979 | err = dsk.Seek(track, sector) 980 | if err != nil { 981 | return err 982 | } 983 | dsk.Write(chunk) 984 | 985 | block++ 986 | 987 | } 988 | 989 | // 4th: Write the T/S List 990 | offset := 0 991 | for blockIdx, block := range tsBlocks { 992 | listTrack, listSector := block[0], block[1] 993 | nextTrack, nextSector := 0, 0 994 | if blockIdx < len(tsBlocks)-1 { 995 | nextTrack, nextSector = tsBlocks[blockIdx+1][0], tsBlocks[blockIdx+1][1] 996 | } 997 | 998 | buffer := make([]byte, STD_BYTES_PER_SECTOR) 999 | 1000 | // header 1001 | buffer[0x01] = byte(nextTrack) 1002 | buffer[0x02] = byte(nextSector) 1003 | buffer[0x05] = byte(offset & 0xff) 1004 | buffer[0x06] = byte(offset / 0x100) 1005 | 1006 | // 1007 | count := vtoc.GetMaxTSPairsPerSector() 1008 | if offset+count >= len(dataBlocks) { 1009 | count = len(dataBlocks) - offset 1010 | } 1011 | 1012 | for i := 0; i < count; i++ { 1013 | pos := 0x0c + i*2 1014 | buffer[pos+0x00] = byte(dataBlocks[offset+i][0]) 1015 | buffer[pos+0x01] = byte(dataBlocks[offset+i][1]) 1016 | vtoc.SetTSFree(dataBlocks[offset+i][0], dataBlocks[offset+i][1], false) 1017 | } 1018 | 1019 | // Write the sector 1020 | err = dsk.Seek(listTrack, listSector) 1021 | if err != nil { 1022 | return err 1023 | } 1024 | dsk.Write(buffer) 1025 | vtoc.SetTSFree(listTrack, listSector, false) 1026 | } 1027 | 1028 | err = vtoc.Publish(dsk) 1029 | if err != nil { 1030 | return err 1031 | } 1032 | 1033 | // 5th and finally: Let's make that catalog entry 1034 | fd.SetName(name) 1035 | fd.SetTrackSectorListStart(tsBlocks[0][0], tsBlocks[0][1]) 1036 | fd.SetType(kind) 1037 | fd.SetTotalSectors(sectorCount) 1038 | fd.Publish(dsk) 1039 | 1040 | fmt.Printf("Added file %s (kind %d)\n", name, kind) 1041 | 1042 | return nil 1043 | 1044 | } 1045 | 1046 | func (d *DSKWrapper) AppleDOSRemoveFile(fd *FileDescriptor) error { 1047 | 1048 | vtoc, err := d.AppleDOSGetVTOC() 1049 | if err != nil { 1050 | return err 1051 | } 1052 | 1053 | if fd.IsUnused() { 1054 | return errors.New("File does not exist") 1055 | } 1056 | 1057 | tsBlocks, e := d.AppleDOSGetTSListSectors(*fd, -1) 1058 | if e != nil { 1059 | return e 1060 | } 1061 | 1062 | dataBlocks, e := d.AppleDOSGetFileSectors(*fd, -1) 1063 | if e != nil { 1064 | return e 1065 | } 1066 | 1067 | for _, pair := range dataBlocks { 1068 | vtoc.SetTSFree(pair[0], pair[1], true) 1069 | } 1070 | 1071 | for _, pair := range tsBlocks { 1072 | vtoc.SetTSFree(pair[0], pair[1], true) 1073 | } 1074 | 1075 | vtoc.Publish(d) 1076 | 1077 | fd.Data[0x00] = 0xff 1078 | fd.SetName("") 1079 | return fd.Publish(d) 1080 | 1081 | } 1082 | 1083 | func (dsk *DSKWrapper) AppleDOSDeleteFile(name string) error { 1084 | 1085 | vtoc, err := dsk.AppleDOSGetVTOC() 1086 | if err != nil { 1087 | return err 1088 | } 1089 | 1090 | // We cheat here a bit and use the get first free entry call with 1091 | // autogrow turned off. 1092 | fd, err := dsk.AppleDOSNamedCatalogEntry(name) 1093 | if err != nil { 1094 | return err 1095 | } 1096 | 1097 | if fd.IsUnused() { 1098 | return errors.New("Not found") 1099 | } 1100 | 1101 | // At this stage we have a match so get blocks to remove 1102 | tsBlocks, e := dsk.AppleDOSGetTSListSectors(*fd, -1) 1103 | if e != nil { 1104 | return e 1105 | } 1106 | 1107 | dataBlocks, e := dsk.AppleDOSGetFileSectors(*fd, -1) 1108 | if e != nil { 1109 | return e 1110 | } 1111 | 1112 | for _, pair := range dataBlocks { 1113 | vtoc.SetTSFree(pair[0], pair[1], true) 1114 | } 1115 | 1116 | for _, pair := range tsBlocks { 1117 | vtoc.SetTSFree(pair[0], pair[1], true) 1118 | } 1119 | 1120 | err = vtoc.Publish(dsk) 1121 | if err != nil { 1122 | return err 1123 | } 1124 | 1125 | fd.Data[0x00] = 0xff 1126 | fd.SetName("") 1127 | return fd.Publish(dsk) 1128 | 1129 | } 1130 | 1131 | func (dsk *DSKWrapper) AppleDOSSetLocked(name string, lock bool) error { 1132 | 1133 | // We cheat here a bit and use the get first free entry call with 1134 | // autogrow turned off. 1135 | fd, err := dsk.AppleDOSNamedCatalogEntry(name) 1136 | if err != nil { 1137 | return err 1138 | } 1139 | 1140 | if fd.IsUnused() { 1141 | return errors.New("Not found") 1142 | } 1143 | 1144 | fd.SetLocked(lock) 1145 | return fd.Publish(dsk) 1146 | 1147 | } 1148 | 1149 | func (dsk *DSKWrapper) AppleDOSRenameFile(name, newname string) error { 1150 | 1151 | fd, err := dsk.AppleDOSNamedCatalogEntry(name) 1152 | if err != nil { 1153 | return err 1154 | } 1155 | 1156 | _, err = dsk.AppleDOSNamedCatalogEntry(newname) 1157 | if err == nil { 1158 | return errors.New("New name already exists") 1159 | } 1160 | 1161 | // can rename here 1162 | fd.SetName(newname) 1163 | return fd.Publish(dsk) 1164 | 1165 | } 1166 | --------------------------------------------------------------------------------