├── .gitignore ├── README.md ├── cmd ├── config.go ├── root.go ├── scrape.go └── version.go ├── main.go ├── model ├── decor.go └── meta.go ├── selector ├── selector.go └── site.go ├── site_test.go ├── sites ├── JavLib.go ├── ave.go ├── carib.go ├── caribpr.go ├── dmm.go ├── fantia.go ├── fc2.go ├── fc2club.go ├── getchu.go ├── heyzo.go ├── jav.go ├── mgs.go ├── pondo.go └── tokyo.go └── util ├── files.go ├── json.go └── medias.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | tidy 3 | config.yaml 4 | experiment -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tidy 2 | 3 | Tidy can get metadata from site by ID, and organize your files by expected structure. It's still under development. 4 | 5 | ### Install 6 | 7 | ``` 8 | go get -u github.com/ruriio/tidy 9 | ``` 10 | 11 | ### Usage 12 | 13 | ``` 14 | tidy dmm . 15 | ``` 16 | 17 | ### Supported sites 18 | 19 | - [x] dmm 20 | - [x] fc2, fc2club 21 | - [x] mgs 22 | - [x] ave 23 | - [x] carib, caribpr 24 | - [x] fatia 25 | - [x] getchu 26 | - [x] heyzo 27 | - [x] 1pondo 28 | - [x] tokyo 29 | -------------------------------------------------------------------------------- /cmd/config.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "github.com/mitchellh/go-homedir" 6 | "github.com/spf13/viper" 7 | "os" 8 | ) 9 | 10 | func initConfigs() { 11 | viper.AutomaticEnv() 12 | 13 | if configFile != "" { 14 | viper.SetConfigFile(configFile) 15 | } else { 16 | home, err := homedir.Dir() 17 | 18 | if err != nil { 19 | er(err) 20 | } 21 | 22 | viper.AddConfigPath(home) 23 | viper.SetConfigName("config") 24 | viper.SetConfigType("yaml") 25 | viper.AddConfigPath(".") 26 | viper.AddConfigPath("$HOME/.tidy") 27 | viper.AddConfigPath("/etc/tidy/") 28 | } 29 | 30 | err := viper.ReadInConfig() 31 | if err != nil { 32 | fmt.Printf("Fatal error config file: %s \n", err) 33 | } else { 34 | fmt.Println("Using config file:", viper.ConfigFileUsed()) 35 | } 36 | } 37 | 38 | func er(msg interface{}) { 39 | fmt.Println("Error:", msg) 40 | os.Exit(1) 41 | } 42 | -------------------------------------------------------------------------------- /cmd/root.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "github.com/spf13/cobra" 5 | ) 6 | 7 | var ( 8 | configFile string 9 | 10 | rootCmd = &cobra.Command{ 11 | Use: "tidy", 12 | Short: "Tidy is a helper to make your things tidy", 13 | Long: `Tidy is a helper to make your things tidy.`, 14 | } 15 | ) 16 | 17 | func Execute() error { 18 | addConfigs() 19 | addFlags() 20 | addCommands() 21 | 22 | return rootCmd.Execute() 23 | } 24 | 25 | func addConfigs() { 26 | cobra.OnInitialize(initConfigs) 27 | } 28 | 29 | func addCommands() { 30 | rootCmd.AddCommand(versionCmd) 31 | rootCmd.AddCommand(scrapeCmd) 32 | } 33 | 34 | func addFlags() { 35 | rootCmd.PersistentFlags().StringVarP(&configFile, "config", "c", "", 36 | "config file (default is $HOME/.tidy/config.yaml)") 37 | } 38 | -------------------------------------------------------------------------------- /cmd/scrape.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | . "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | . "github.com/ruriio/tidy/sites" 7 | . "github.com/ruriio/tidy/util" 8 | . "github.com/spf13/cobra" 9 | "log" 10 | "path" 11 | "path/filepath" 12 | "reflect" 13 | "runtime" 14 | "strings" 15 | ) 16 | 17 | var siteMap = registerSites() 18 | 19 | var scrapeCmd = &Command{ 20 | Use: "scrape", 21 | Aliases: getAliases(), 22 | Short: "Scrape site meta info", 23 | Long: `Get site meta info`, 24 | Run: run, 25 | } 26 | 27 | var extensions = map[string]bool{ 28 | ".mp4": true, 29 | ".mkv": true, 30 | ".wmv": true, 31 | ".avi": true, 32 | } 33 | 34 | func run(cmd *Command, args []string) { 35 | if len(args) > 0 { 36 | siteId := cmd.CalledAs() 37 | id := args[0] 38 | if id == "." { 39 | scrapeDir(siteId) 40 | } else { 41 | scrape(siteId, id) 42 | } 43 | } else { 44 | Println("Need at least 1 args.") 45 | } 46 | } 47 | 48 | func registerSites() map[string]func(string) Site { 49 | sites := make(map[string]func(string) Site) 50 | 51 | register(sites, Dmm) 52 | register(sites, Fc2) 53 | register(sites, Mgs) 54 | register(sites, Ave) 55 | register(sites, Tokyo) 56 | register(sites, Getchu) 57 | return sites 58 | } 59 | 60 | func register(sites map[string]func(string) Site, site func(string) Site) { 61 | key := strings.ToLower(getFuncName(site)) 62 | sites[key] = site 63 | } 64 | 65 | func getAliases() []string { 66 | var aliases []string 67 | for site := range siteMap { 68 | aliases = append(aliases, site) 69 | } 70 | return aliases 71 | } 72 | 73 | func getFuncName(i interface{}) string { 74 | name := runtime.FuncForPC(reflect.ValueOf(i).Pointer()).Name() 75 | parts := strings.Split(name, ".") 76 | return parts[len(parts)-1] 77 | } 78 | 79 | func isSiteDir(name string) bool { 80 | return siteMap[name] != nil 81 | } 82 | 83 | func scrapeDir(siteId string) { 84 | files, err := filepath.Glob("*") 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | 89 | for _, file := range files { 90 | 91 | // ignore site dir 92 | if isSiteDir(file) { 93 | continue 94 | } 95 | 96 | ext := strings.ToLower(filepath.Ext(file)) 97 | if IsDirectory(file) || extensions[ext] { 98 | scrape(siteId, file) 99 | } 100 | } 101 | } 102 | 103 | func scrape(siteId string, id string) { 104 | site := siteMap[siteId](id) 105 | meta := site.Meta() 106 | 107 | if len(meta.Title) == 0 { 108 | return 109 | } 110 | 111 | dir := Sprintf("%v", meta.Extras["path"]) 112 | dir = Move(id, dir) 113 | file := path.Join(dir, "meta.json") 114 | Write(file, meta.Byte()) 115 | DownloadMedias(dir, meta.Poster, meta.Sample, meta.Images) 116 | } 117 | -------------------------------------------------------------------------------- /cmd/version.go: -------------------------------------------------------------------------------- 1 | package cmd 2 | 3 | import ( 4 | "fmt" 5 | "github.com/spf13/cobra" 6 | ) 7 | 8 | var versionCmd = &cobra.Command{ 9 | Use: "version", 10 | Short: "Print the version number of Tidy", 11 | Long: `All software has versions. This is Tidy's`, 12 | Run: func(cmd *cobra.Command, args []string) { 13 | fmt.Println("Tidy v1.0.0") 14 | }, 15 | } 16 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/ruriio/tidy/cmd" 4 | 5 | func main() { 6 | cmd.Execute() 7 | } 8 | -------------------------------------------------------------------------------- /model/decor.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | type Decor interface { 4 | Decorate(meta *Meta) *Meta 5 | } 6 | -------------------------------------------------------------------------------- /model/meta.go: -------------------------------------------------------------------------------- 1 | package model 2 | 3 | import ( 4 | "github.com/ruriio/tidy/util" 5 | "log" 6 | ) 7 | 8 | type Meta struct { 9 | Id string `json:"id"` 10 | Title string `json:"title"` 11 | Actor string `json:"actor"` 12 | Producer string `json:"producer,omitempty"` 13 | Series string `json:"series"` 14 | Release string `json:"release"` 15 | Duration string `json:"duration"` 16 | Sample string `json:"sample"` 17 | Poster string `json:"poster"` 18 | Images []string `json:"images"` 19 | Label string `json:"label"` 20 | Genre []string `json:"genre"` 21 | Url string `json:"url"` 22 | Extras map[string]interface{} `json:"extras"` 23 | } 24 | 25 | func (meta Meta) Json() string { 26 | return string(meta.Byte()) 27 | } 28 | 29 | func (meta Meta) Byte() []byte { 30 | out, err := util.JSONMarshal(meta) 31 | if err != nil { 32 | log.Panic(err) 33 | } 34 | return out 35 | } 36 | -------------------------------------------------------------------------------- /selector/selector.go: -------------------------------------------------------------------------------- 1 | package selector 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "github.com/PuerkitoBio/goquery" 7 | "log" 8 | "reflect" 9 | "regexp" 10 | "strings" 11 | ) 12 | 13 | type Selector struct { 14 | Id *Item 15 | Title *Item 16 | Actor *Item 17 | Poster *Item 18 | Series *Item 19 | Producer *Item 20 | Release *Item 21 | Duration *Item 22 | Sample *Item 23 | Images *Item 24 | Label *Item 25 | Genre *Item 26 | Extras map[string]*Item 27 | } 28 | 29 | type Item struct { 30 | selector string 31 | attribute string 32 | replacer *strings.Replacer 33 | preset string 34 | format string 35 | presets []string 36 | matcher string 37 | query string 38 | Plural bool 39 | } 40 | 41 | func Select(selector string) *Item { 42 | return &Item{selector: selector, attribute: "", replacer: strings.NewReplacer("", ""), preset: ""} 43 | } 44 | 45 | func Selects(selector string) *Item { 46 | return &Item{selector: selector, replacer: strings.NewReplacer("", ""), Plural: true} 47 | } 48 | 49 | func Preset(preset string) *Item { 50 | return &Item{preset: preset} 51 | } 52 | 53 | func Presets(presets []string) *Item { 54 | return &Item{presets: presets} 55 | } 56 | 57 | func Match(matcher string) *Item { 58 | return &Item{matcher: matcher} 59 | } 60 | 61 | func Query(query string) *Item { 62 | return &Item{query: query} 63 | } 64 | 65 | func (selector Item) Replace(oldNew ...string) *Item { 66 | selector.replacer = strings.NewReplacer(oldNew...) 67 | return &selector 68 | } 69 | 70 | func (selector Item) Format(format string) *Item { 71 | selector.format = format 72 | return &selector 73 | } 74 | 75 | func (selector Item) Attribute(attr string) *Item { 76 | selector.attribute = attr 77 | return &selector 78 | } 79 | 80 | func (selector Item) Text(doc *goquery.Document) string { 81 | text := doc.Find(selector.selector).First().Text() 82 | return strings.TrimSpace(selector.replacer.Replace(text)) 83 | } 84 | 85 | func (selector Item) Texts(doc *goquery.Document) []string { 86 | var texts []string 87 | doc.Find(selector.selector).Each(func(i int, selection *goquery.Selection) { 88 | text := selection.Text() 89 | text = strings.TrimSpace(selector.replacer.Replace(text)) 90 | texts = append(texts, text) 91 | }) 92 | 93 | return texts 94 | } 95 | 96 | func (selector *Item) Value(doc *goquery.Document) string { 97 | 98 | if selector == nil || doc == nil { 99 | return "" 100 | } 101 | 102 | if len(selector.preset) > 0 { 103 | return selector.preset 104 | } 105 | 106 | if len(selector.matcher) > 0 { 107 | return selector.matcherValue(doc) 108 | } 109 | 110 | selection := doc.Find(selector.selector).First() 111 | 112 | value := selector.textOrAttr(selection) 113 | 114 | if len(selector.format) > 0 { 115 | value = fmt.Sprintf(selector.format, value) 116 | } 117 | return value 118 | } 119 | 120 | func (selector Item) matcherValue(doc *goquery.Document) string { 121 | text := doc.Text() 122 | return selector.matcherText(text) 123 | } 124 | 125 | func (selector Item) matcherText(text string) string { 126 | re := regexp.MustCompile(selector.matcher) 127 | 128 | matches := re.FindAllString(text, -1) 129 | if len(matches) > 0 { 130 | text = matches[0] 131 | } else { 132 | text = "" 133 | } 134 | if selector.replacer != nil { 135 | text = selector.replacer.Replace(strings.TrimSpace(text)) 136 | } 137 | return strings.TrimSpace(text) 138 | } 139 | 140 | func (selector *Item) Values(doc *goquery.Document) []string { 141 | var texts []string 142 | 143 | if selector == nil || doc == nil { 144 | return texts 145 | } 146 | 147 | if selector.presets != nil { 148 | return selector.presets 149 | } 150 | 151 | doc.Find(selector.selector).Each(func(i int, selection *goquery.Selection) { 152 | texts = append(texts, selector.textOrAttr(selection)) 153 | }) 154 | 155 | return texts 156 | } 157 | 158 | func (selector Item) textOrAttr(selection *goquery.Selection) string { 159 | text := "" 160 | if len(selector.attribute) > 0 { 161 | src, exist := selection.Attr(selector.attribute) 162 | if exist { 163 | text = src 164 | } 165 | } else { 166 | text = selection.Text() 167 | } 168 | 169 | value := strings.TrimSpace(selector.replacer.Replace(text)) 170 | 171 | if len(selector.format) > 0 { 172 | value = fmt.Sprintf(selector.format, value) 173 | } 174 | 175 | return value 176 | } 177 | 178 | func (selector Item) Image(doc *goquery.Document) string { 179 | return selector.Attr(doc, "src") 180 | } 181 | 182 | func (selector Item) Images(doc *goquery.Document) []string { 183 | return selector.Attrs(doc, "src") 184 | } 185 | 186 | func (selector Item) Link(doc *goquery.Document) string { 187 | return selector.Attr(doc, "href") 188 | } 189 | 190 | func (selector Item) Links(doc *goquery.Document) []string { 191 | return selector.Attrs(doc, "href") 192 | } 193 | 194 | func (selector Item) Attr(doc *goquery.Document, attr string) string { 195 | src, exist := doc.Find(selector.selector).First().Attr(attr) 196 | if exist { 197 | return selector.replacer.Replace(strings.TrimSpace(src)) 198 | } 199 | return "" 200 | } 201 | 202 | func (selector Item) Attrs(doc *goquery.Document, attr string) []string { 203 | var attrs []string 204 | doc.Find(selector.selector).Each(func(i int, selection *goquery.Selection) { 205 | src, exist := selection.Attr(attr) 206 | if exist { 207 | text := strings.TrimSpace(src) 208 | text = selector.replacer.Replace(text) 209 | attrs = append(attrs, text) 210 | } 211 | }) 212 | return attrs 213 | } 214 | 215 | func (selector *Item) Query(data map[string]interface{}) string { 216 | if selector == nil { 217 | return "" 218 | } 219 | if len(selector.preset) > 0 { 220 | return selector.preset 221 | } 222 | return query(data, selector.query) 223 | } 224 | 225 | func query(data map[string]interface{}, key string) string { 226 | value := data[key] 227 | 228 | if value != nil { 229 | return fmt.Sprintf("%v", value) 230 | } 231 | return "" 232 | } 233 | 234 | func (selector *Item) Queries(data map[string]interface{}) []string { 235 | 236 | if selector == nil { 237 | return []string{} 238 | } 239 | 240 | if selector.presets != nil { 241 | return selector.presets 242 | } 243 | 244 | return queries(data, selector.query) 245 | } 246 | 247 | func queries(data map[string]interface{}, key string) []string { 248 | var res []string 249 | x := data[key] 250 | if x != nil { 251 | // if json object is not slice then ignore 252 | if reflect.ValueOf(x).Kind() == reflect.Slice { 253 | array := x.([]interface{}) 254 | for _, v := range array { 255 | var value string 256 | 257 | if reflect.ValueOf(v).Kind() == reflect.Map { 258 | out, err := json.Marshal(v) 259 | if err != nil { 260 | log.Fatal(err) 261 | } 262 | value = string(out) 263 | } else { 264 | value = fmt.Sprintf("%v", v) 265 | } 266 | res = append(res, value) 267 | } 268 | } 269 | } 270 | return res 271 | } 272 | 273 | func (selectors Selector) Extra(key string, selector *Item) Selector { 274 | if selectors.Extras == nil { 275 | selectors.Extras = make(map[string]*Item) 276 | } 277 | selectors.Extras[key] = selector 278 | return selectors 279 | } 280 | -------------------------------------------------------------------------------- /selector/site.go: -------------------------------------------------------------------------------- 1 | package selector 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "errors" 7 | "github.com/PuerkitoBio/goquery" 8 | . "github.com/ruriio/tidy/model" 9 | "golang.org/x/net/html/charset" 10 | "io" 11 | "io/ioutil" 12 | "log" 13 | "net/http" 14 | "strings" 15 | ) 16 | 17 | const UserAgent string = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) " + 18 | "Chrome/75.0.3770.90 Safari/537.36" 19 | 20 | const MobileUserAgent string = "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) " + 21 | "AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1" 22 | 23 | type Site struct { 24 | Key string 25 | Url string 26 | WebUrl string 27 | UserAgent string 28 | Charset string 29 | Cookies []http.Cookie 30 | Path string 31 | 32 | Selector 33 | 34 | Json bool 35 | JsonData interface{} 36 | 37 | Decor Decor 38 | 39 | meta Meta 40 | Next *Site 41 | Search *Site // if directly parse failed, get meta url from search result. 42 | } 43 | 44 | func (site Site) Decorate(meta *Meta) *Meta { 45 | return &site.meta 46 | } 47 | 48 | func (site *Site) Meta() Meta { 49 | site.meta = Meta{} 50 | if site.Json { 51 | site.meta = site.parseJson() 52 | } else { 53 | site.meta = site.parseHtml() 54 | } 55 | 56 | if site.meta.Extras == nil { 57 | site.meta.Extras = make(map[string]interface{}) 58 | } 59 | 60 | if len(site.Path) > 0 { 61 | site.meta.Extras["path"] = site.path(site.meta) 62 | } 63 | 64 | if len(site.WebUrl) > 0 { 65 | site.meta.Url = site.WebUrl 66 | } else { 67 | site.meta.Url = site.Url 68 | } 69 | 70 | if site.Next != nil { 71 | if len(site.Next.Url) > 0 { 72 | site.meta.Extras["nextUrl"] = site.Next.Url 73 | } 74 | } 75 | 76 | if site.Decor != nil { 77 | return *site.Decor.Decorate(&site.meta) 78 | } else { 79 | return site.meta 80 | } 81 | } 82 | 83 | func (site *Site) parseJson() Meta { 84 | var meta = Meta{} 85 | html, err := site.Body() 86 | 87 | if err != nil { 88 | log.Println(err) 89 | } 90 | 91 | body, err := ioutil.ReadAll(html) 92 | err = json.Unmarshal(body, &site.JsonData) 93 | if err != nil { 94 | log.Println(err) 95 | } 96 | 97 | data := make(map[string]interface{}) 98 | m, ok := site.JsonData.(map[string]interface{}) 99 | if ok { 100 | for k, v := range m { 101 | //fmt.Println(k, "=>", v) 102 | data[k] = v 103 | } 104 | } 105 | 106 | next := Meta{} 107 | if site.Next != nil { 108 | next = site.Next.Meta() 109 | } 110 | 111 | // extract meta data from json data 112 | meta.Title = oneOf(site.Title.Query(data), next.Title) 113 | meta.Actor = oneOf(site.Actor.Query(data), next.Actor) 114 | meta.Poster = oneOf(site.Poster.Query(data), next.Poster) 115 | meta.Producer = oneOf(site.Producer.Query(data), next.Producer) 116 | meta.Sample = oneOf(site.Sample.Query(data), next.Sample) 117 | meta.Series = oneOf(site.Series.Query(data), next.Series) 118 | meta.Release = oneOf(site.Release.Query(data), next.Release) 119 | meta.Duration = oneOf(site.Duration.Query(data), next.Duration) 120 | meta.Id = oneOf(site.Id.Query(data), next.Id) 121 | meta.Label = oneOf(site.Label.Query(data), next.Label) 122 | meta.Genre = oneOfArray(site.Genre.Queries(data), next.Genre) 123 | meta.Images = oneOfArray(site.Images.Queries(data), next.Images) 124 | 125 | return meta 126 | } 127 | 128 | func (site *Site) parseHtml() Meta { 129 | var meta = Meta{} 130 | var doc *goquery.Document 131 | body, err := site.Body() 132 | 133 | if err != nil { 134 | log.Println(err) 135 | } else { 136 | // load the HTML document 137 | doc, err = goquery.NewDocumentFromReader(body) 138 | 139 | if err != nil { 140 | log.Println(err) 141 | } 142 | } 143 | 144 | var next = Meta{} 145 | if site.Next != nil { 146 | next = site.Next.Meta() 147 | } 148 | 149 | // extract meta data from web page 150 | meta.Title = oneOf(site.Title.Value(doc), next.Title) 151 | meta.Actor = oneOf(site.Actor.Value(doc), next.Actor) 152 | meta.Poster = oneOf(site.Poster.Value(doc), next.Poster) 153 | meta.Producer = oneOf(site.Producer.Value(doc), next.Producer) 154 | meta.Sample = oneOf(site.Sample.Value(doc), next.Sample) 155 | meta.Series = oneOf(site.Series.Value(doc), next.Series) 156 | meta.Release = oneOf(site.Release.Value(doc), next.Release) 157 | meta.Duration = oneOf(site.Duration.Value(doc), next.Duration) 158 | meta.Id = oneOf(site.Id.Value(doc), next.Id) 159 | meta.Label = oneOf(site.Label.Value(doc), next.Label) 160 | meta.Genre = oneOfArray(site.Genre.Values(doc), next.Genre) 161 | meta.Images = oneOfArray(site.Images.Values(doc), next.Images) 162 | 163 | if meta.Id == "" && site.Next != nil { 164 | meta.Id = oneOf(site.Key, site.Next.Key) 165 | } 166 | 167 | // extract extras to meta 168 | if site.Extras != nil { 169 | meta.Extras = make(map[string]interface{}) 170 | for key, value := range site.Extras { 171 | if value.Plural { 172 | meta.Extras[key] = value.Values(doc) 173 | } else { 174 | meta.Extras[key] = value.Value(doc) 175 | } 176 | } 177 | 178 | for key, value := range next.Extras { 179 | meta.Extras[key] = value 180 | } 181 | } else { 182 | meta.Extras = next.Extras 183 | } 184 | 185 | return meta 186 | } 187 | 188 | func (site *Site) Body() (io.ReadCloser, error) { 189 | 190 | if site.Url == "" { 191 | return site.searchAndGet() 192 | } 193 | 194 | resp, err := site.get() 195 | 196 | if err != nil { 197 | log.Println(err) 198 | } 199 | //defer resp.Body.Close() 200 | 201 | if resp.StatusCode != 200 { 202 | log.Printf("stats code error: %d %s, using search\n", resp.StatusCode, resp.Status) 203 | return site.searchAndGet() 204 | } else { 205 | body := resp.Body 206 | 207 | //printHtmlBody(resp) 208 | 209 | // convert none utf-8 web page to utf-8 210 | if site.Charset != "" { 211 | body, err = decodeHTMLBody(resp.Body, site.Charset) 212 | if err != nil { 213 | log.Println(err) 214 | } 215 | } 216 | return body, nil 217 | } 218 | } 219 | 220 | func (site *Site) searchAndGet() (io.ReadCloser, error) { 221 | // get meta url from search result 222 | url := site.search() 223 | if len(url) > 0 { 224 | site.Url = url 225 | site.WebUrl = url 226 | return site.Body() 227 | } else { 228 | return nil, errors.New("No metadata found for " + site.Key) 229 | } 230 | } 231 | 232 | func (site *Site) get() (*http.Response, error) { 233 | log.Printf("get: %s", site.Url) 234 | client := &http.Client{} 235 | req, err := http.NewRequest("GET", site.Url, nil) 236 | if err != nil { 237 | log.Println(err) 238 | } 239 | req.Header.Set("User-Agent", site.UserAgent) 240 | 241 | for _, cookie := range site.Cookies { 242 | req.AddCookie(&cookie) 243 | } 244 | 245 | return client.Do(req) 246 | } 247 | 248 | func (site *Site) search() string { 249 | if site.Search == nil { 250 | return "" 251 | } 252 | 253 | body, err := site.Search.Body() 254 | if err != nil { 255 | log.Println(err) 256 | return "" 257 | } 258 | 259 | doc, err := goquery.NewDocumentFromReader(body) 260 | 261 | if err != nil { 262 | log.Println(err) 263 | return "" 264 | } 265 | 266 | hrefs := site.Search.Extras["search"].Values(doc) 267 | 268 | for _, href := range hrefs { 269 | matcher := site.Search.Extras["match"] 270 | if matcher != nil { 271 | if len(matcher.matcherText(href)) > 0 { 272 | log.Println("match search result: " + href) 273 | return href 274 | } 275 | } else if strings.Contains(href, site.Search.Key) { 276 | log.Println("find search result: " + href) 277 | return href 278 | } 279 | } 280 | 281 | return "" 282 | } 283 | 284 | func (site *Site) path(meta Meta) string { 285 | key := site.Key 286 | 287 | if len(key) == 0 { 288 | key = meta.Id 289 | } 290 | 291 | var replacer = strings.NewReplacer("$Title", meta.Title, "$Id", key, 292 | "$Actor", oneOf(meta.Actor, meta.Producer, meta.Series), 293 | "$Series", oneOf(meta.Series, meta.Producer, meta.Actor), 294 | "$Producer", oneOf(meta.Producer, meta.Series, meta.Actor)) 295 | path := replacer.Replace(site.Path) 296 | 297 | // fix for filename too long error 298 | if len(path) > 204 { 299 | path = string([]rune(path)[0:80]) 300 | } 301 | 302 | if !strings.HasSuffix(path, "/") { 303 | path += "/" 304 | } 305 | 306 | return path 307 | } 308 | 309 | func oneOf(str ...string) string { 310 | for _, s := range str { 311 | if len(s) > 0 { 312 | return s 313 | } 314 | } 315 | return str[0] 316 | } 317 | 318 | func oneOfArray(arr ...[]string) []string { 319 | for _, a := range arr { 320 | if len(a) > 0 { 321 | return a 322 | } 323 | } 324 | return arr[0] 325 | } 326 | 327 | func decodeHTMLBody(body io.Reader, encoding string) (io.ReadCloser, error) { 328 | 329 | body, err := charset.NewReaderLabel(encoding, body) 330 | 331 | if err != nil { 332 | log.Fatal(err) 333 | } 334 | 335 | return ioutil.NopCloser(body), nil 336 | } 337 | 338 | func detectContentCharset(body io.Reader) string { 339 | r := bufio.NewReader(body) 340 | if data, err := r.Peek(1024); err == nil { 341 | if _, name, ok := charset.DetermineEncoding(data, ""); ok { 342 | return name 343 | } 344 | } 345 | return "utf-8" 346 | } 347 | 348 | func printHtmlBody(resp *http.Response) { 349 | body, err := ioutil.ReadAll(resp.Body) 350 | 351 | if err != nil { 352 | log.Fatal(err) 353 | } 354 | 355 | log.Printf("body: %s", string(body)) 356 | } 357 | -------------------------------------------------------------------------------- /site_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/ruriio/tidy/sites" 6 | "testing" 7 | ) 8 | 9 | func TestScrape(t *testing.T) { 10 | //meta := Scrape(sites.Dmm("ssni678")) 11 | //meta := Scrape(sites.Fc2("1294320")) 12 | //meta := Scrape(sites.Fc2Club("437689")) 13 | //meta := Scrape(sites.Carib("030720-001")) 14 | //meta := Scrape(sites.CaribPr("022820_003")) 15 | //meta := Scrape(sites.Mgs("300MIUM-544")) 16 | //meta := Scrape(sites.Heyzo("2177")) 17 | //meta := Scrape(sites.Fantia("8209")) 18 | //meta := Scrape(sites.Getchu("19622")) 19 | //meta := Scrape(sites.Tokyo("n1236")) 20 | //site := sites.Pondo("052416_304") 21 | //site := sites.Jav("ONSD-804") 22 | site := sites.JavLib("ONSD-804") 23 | 24 | fmt.Println(site.Meta().Json()) 25 | } 26 | -------------------------------------------------------------------------------- /sites/JavLib.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func JavLib(id string) Site { 9 | dmmId := parseDmmKey(id) 10 | url := fmt.Sprintf("http://www.javlibrary.com/cn/vl_searchbyid.php?keyword=%s", dmmId) 11 | return Site{ 12 | Key: parseDmmKey(id), 13 | Url: url, 14 | UserAgent: UserAgent, 15 | //Cookies: []http.Cookie{{Name: "__cfduid", Value: "d1050bbdb76517956e3ea66542aa5b7c81584260542"}, 16 | // {Name: "cf_clearance", Value: "01026f8f689772cc62faa3993d7ccb7a49c805c5-1585746249-0-150"}, 17 | // {Name: "over18", Value: "18"},}, 18 | 19 | Selector: Selector{ 20 | Title: Select("h3").Replace(dmmId, ""), 21 | Actor: Select("span.cast"), 22 | Poster: Select("#video_jacket_img").Attribute("src").Replace("//", "http://"), 23 | Producer: Select("#maker"), 24 | Sample: Select(".play-btn").Attribute("href"), 25 | Series: Select("a[href^=\"https://www.javbus.com/series\"]"), 26 | Release: Match(`\d{4}-\d{2}-\d{2}`), 27 | Duration: Select("#video_length").Replace("长度:\n\t", ""), 28 | Id: Select("div#video_id.item").Replace("识别码:\n\t", ""), 29 | Label: Select("a[href^=\"https://www.javbus.com/label\"]"), 30 | Genre: Select(".genre"), 31 | Images: Select("a.sample-box").Attribute("href"), 32 | }.Extra("actors", Selects("span.cast")), 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /sites/ave.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "path" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | func Ave(id string) Site { 12 | id = parseAveId(id) 13 | 14 | search := Site{ 15 | Url: fmt.Sprintf("https://www.aventertainments.com/search_Products.aspx?"+ 16 | "languageID=2&dept_id=29&keyword=%s&searchby=keyword", id), 17 | UserAgent: UserAgent, 18 | Selector: Selector{}.Extra("search", 19 | Select(".list-cover > a").Attribute("href")), 20 | } 21 | 22 | return Site{ 23 | Url: "", 24 | UserAgent: UserAgent, 25 | Search: &search, 26 | Path: "ave/$Actor/$Id $Title/", 27 | 28 | Selector: Selector{ 29 | Title: Select("h2"), 30 | Actor: Select("a[href^=\"https://www.aventertainments.com/ActressDetail\"]"), 31 | Poster: Select("img[src^=\"https://imgs.aventertainments.com/new/jacket_images\"]"). 32 | Attribute("src").Replace("jacket_images", "bigcover"), 33 | Producer: Select("a[href^=\"https://www.aventertainments.com/studio_products\"]"), 34 | Sample: Match(`https://.*.m3u8`), 35 | Series: Select("a[href^=\"https://www.aventertainments.com/Series\""), 36 | Release: Match(`\d{1,}/\d{1,}/\d{4}`), 37 | Duration: Match(`\d{2,} Min`), 38 | Id: Select(".top-title").Replace("商品番号: ", ""), 39 | Label: Select("null"), 40 | Genre: Select("ol > a[href^=\"https://www.aventertainments.com/subdept_product\"]"), 41 | Images: Select("img[src^=\"https://imgs.aventertainments.com/new/screen_shot\"], "+ 42 | "img[src^=\"https://imgs.aventertainments.com//vodimages/screenshot/\"]"). 43 | Attribute("src").Replace("small", "large"), 44 | }.Extra("actors", Selects("a[href^=\"https://www.aventertainments.com/ActressDetail\"]")), 45 | } 46 | } 47 | 48 | func parseAveId(key string) string { 49 | ext := path.Ext(key) 50 | name := strings.ToUpper(strings.TrimSuffix(key, ext)) 51 | re := regexp.MustCompile(`[A-Z]{2,}-? ?\d{2,}`) 52 | 53 | matches := re.FindAllString(name, -1) 54 | 55 | if len(matches) > 0 { 56 | return matches[0] 57 | } 58 | return "nil" 59 | } 60 | -------------------------------------------------------------------------------- /sites/carib.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func Carib(id string) Site { 9 | return Site{ 10 | Url: fmt.Sprintf("https://www.caribbeancom.com/moviepages/%s/index.html", id), 11 | UserAgent: MobileUserAgent, 12 | Charset: "euc-jp", 13 | 14 | Selector: Selector{ 15 | Title: Select("h1[itemprop=name]"), 16 | Actor: Select("a[itemprop=actor]"), 17 | Poster: Preset(fmt.Sprintf("https://www.caribbeancom.com/moviepages/%s/images/l_l.jpg", id)), 18 | Producer: Preset("Caribbean"), 19 | Sample: Preset(fmt.Sprintf("https://smovie.caribbeancom.com/sample/movies/%s/480p.mp4", id)), 20 | Series: Select("a[onclick^=gaDetailEvent\\(\\'Series\\ Name\\']"), 21 | Release: Select("span[itemprop=datePublished]"), 22 | Duration: Select("span[itemprop=duration]"), 23 | Id: Preset(id), 24 | Label: Select("null"), 25 | Genre: Select("a[itemprop=genre]"), 26 | Images: Select("a[data-is_sample='1']").Attribute("href").Replace("/movie", "https://www.caribbeancom.com/movie"), 27 | }, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /sites/caribpr.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func CaribPr(id string) Site { 9 | return Site{ 10 | Url: fmt.Sprintf("https://www.caribbeancompr.com/moviepages/%s/index.html", id), 11 | UserAgent: MobileUserAgent, 12 | Charset: "euc-jp", 13 | 14 | Selector: Selector{ 15 | Title: Select("h1"), 16 | Actor: Select("a.spec-item[href^=\"/search/\"]"), 17 | Poster: Preset(fmt.Sprintf("https://www.caribbeancompr.com/moviepages/%s/images/l_l.jpg", id)), 18 | Producer: Preset("Caribbean"), 19 | Sample: Preset(fmt.Sprintf("https://smovie.caribbeancompr.com/sample/movies/%s/480p.mp4", id)), 20 | Series: Select("a[href^=\"/serieslist/\"]"), 21 | Release: Select("div.movie-info > div > ul > li:nth-child(2) > span.spec-content"), 22 | Duration: Select("div.movie-info > div > ul > li:nth-child(3) > span.spec-content"), 23 | Id: Preset(id), 24 | Label: Select("a[href^=\"/serieslist/\"]"), 25 | Genre: Select("a.spec-item[href^=\"/listpages/\"]"), 26 | Images: Select("a[data-is_sample='1']").Attribute("href"), 27 | }, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /sites/dmm.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "net/http" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | func Dmm(id string) Site { 12 | dmmId := parseDmmId(id) 13 | next := Jav(id) 14 | search := Site{ 15 | Key: dmmId, 16 | Url: fmt.Sprintf("https://www.dmm.co.jp/search/=/searchstr=%s/", dmmId), 17 | UserAgent: MobileUserAgent, 18 | Selector: Selector{}. 19 | Extra("search", Select("a[href^=\"https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=\"]").Attribute("href")). 20 | Extra("match", Match(fmt.Sprintf("cid=[a-z_]{0,4}\\d{0,4}%s", dmmId))), 21 | } 22 | return Site{ 23 | Key: parseDmmKey(id), 24 | Url: fmt.Sprintf("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=%s/", dmmId), 25 | UserAgent: MobileUserAgent, 26 | Cookies: []http.Cookie{{Name: "age_check_done", Value: "1"}}, 27 | Path: "dmm/$Actor/$Id $Title/", 28 | Search: &search, 29 | Next: &next, 30 | 31 | Selector: Selector{ 32 | Title: Select("hgroup > h1").Replace("DVD", "", "Blu-ray", ""), 33 | Actor: Select("ul.parts-maindata > li > a > span"), 34 | Poster: Select(".package").Replace("ps.jpg", "pl.jpg").Attribute("src"), 35 | Producer: Select(".parts-subdata"), 36 | Sample: Select(".play-btn").Attribute("href"), 37 | Series: Select(".box-taglink > li > a[href^=\"/mono/dvd/-/list/=/article=series/\"]"), 38 | Release: Match(`\d{4}/\d{2}/\d{2}`), 39 | Duration: Match(`\d{2,}分`), 40 | Id: Select("品番 [a-z_\\d]*").Replace("品番 ", ""), 41 | Label: Select(".box-taglink > li > a[href^=\"/mono/dvd/-/list/=/article=label/\"]"), 42 | Genre: Select(".box-taglink > li > a[href^=\"/mono/dvd/-/list/=/article=keyword/\"]"), 43 | Images: Select("#sample-list > ul > li > a > span > img").Replace("-", "jp-").Attribute("src"), 44 | }.Extra("actors", Selects(".box-taglink > li > a[href^=\"/mono/dvd/-/list/=/article=actress/\"]")), 45 | } 46 | } 47 | 48 | func parseDmmKey(key string) string { 49 | name := strings.ToUpper(key) 50 | re := regexp.MustCompile(`[A-Z]{2,}-? ?\d{2,}`) 51 | 52 | matches := re.FindAllString(name, -1) 53 | 54 | if len(matches) > 0 { 55 | return matches[0] 56 | } 57 | return "nil" 58 | } 59 | 60 | func parseDmmId(key string) string { 61 | return strings.ReplaceAll(strings.ToLower(parseDmmKey(key)), "-", "") 62 | } 63 | -------------------------------------------------------------------------------- /sites/fantia.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "net/http" 7 | ) 8 | 9 | func Fantia(id string) Site { 10 | 11 | return Site{ 12 | Url: fmt.Sprintf("https://fantia.jp/products/%s", id), 13 | UserAgent: UserAgent, 14 | Cookies: []http.Cookie{{Name: "_session_id", Value: "5602e9a9f48bba1997b07baca88e525f"}}, 15 | 16 | Selector: Selector{ 17 | Title: Select(".product-title"), 18 | Actor: Select("h3.fanclub-name"), 19 | Poster: Select("img[src^=\"https://c.fantia.jp/uploads/product/image\"]").Attribute("src"), 20 | Producer: Select("h3.fanclub-name"), 21 | Sample: Select("null"), 22 | Series: Select("h3.fanclub-name"), 23 | Id: Select("a.btn.btn-default.btn-sm.btn-star").Attribute("data-product_id"), 24 | Label: Select("null"), 25 | Genre: Select("null"), 26 | Images: Select("img[src^=\"https://c.fantia.jp/uploads/product_image\"]").Attribute("src"), 27 | }, 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /sites/fc2.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "regexp" 7 | "strings" 8 | ) 9 | 10 | func Fc2(id string) Site { 11 | id = parseFc2Id(id) 12 | //next := Fc2Club(id) 13 | 14 | return Site{ 15 | Url: fmt.Sprintf("https://adult.contents.fc2.com/article/%s/", id), 16 | UserAgent: MobileUserAgent, 17 | Path: "fc2/$Actor/FC2-PPV-$Id $Title/", 18 | //Next: &next, 19 | 20 | Selector: Selector{ 21 | Title: Select(".items_article_MainitemNameTitle"), 22 | Actor: Select(".items_article_seller").Replace("by ", ""), 23 | Poster: Select("meta[property^=\"og:image\"]").Attribute("content"), 24 | Producer: Select(".items_article_seller").Replace("by ", ""), 25 | Sample: Select(".main-video").Attribute("src"), 26 | Series: Select(".items_article_seller").Replace("by ", ""), 27 | Release: Select(".items_article_Releasedate").Replace("販売日 : ", ""), 28 | Duration: Select(".items_article_MainitemThumb > p"), 29 | Id: Select(".items_article_priceHistory").Attribute("data-id"), 30 | Label: Select("null"), 31 | Genre: Select("null"), 32 | Images: Select("li[data-img^=\"https://storage\"]").Attribute("data-img"), 33 | }, 34 | } 35 | } 36 | 37 | func parseFc2Id(id string) string { 38 | id = strings.ToLower(id) 39 | if strings.HasPrefix(id, "fc2") { 40 | re := regexp.MustCompile(`\d{4,}`) 41 | matches := re.FindAllString(id, -1) 42 | if len(matches) > 0 { 43 | return matches[0] 44 | } 45 | } 46 | return "" 47 | } 48 | -------------------------------------------------------------------------------- /sites/fc2club.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func Fc2Club(id string) Site { 9 | url := fmt.Sprintf("https://fc2club.com/html/FC2-%s.html", id) 10 | 11 | return Site{ 12 | Url: url, 13 | UserAgent: MobileUserAgent, 14 | Key: id, 15 | 16 | Selector: Selector{ 17 | Title: Select("div.col-sm-8 > h3").Replace(fmt.Sprintf("FC2-%s ", id), ""), 18 | Actor: Select("div.col-sm-8 > h5:nth-child(7) > a"), 19 | Poster: Select("div.col-sm-8 > a").Attribute("href").Replace("/upload", "https://fc2club.com/upload"), 20 | Producer: Select("div.col-sm-8 > h5:nth-child(5) > a:nth-child(2)"), 21 | Sample: Select("null"), 22 | Series: Select("div.col-sm-8 > h5:nth-child(5) > a:nth-child(2)"), 23 | Release: Select(".items_article_Releasedate").Replace("販売日 : ", ""), 24 | Duration: Select(".items_article_MainitemThumb > p"), 25 | Id: Select(".items_article_TagArea").Attribute("data-id"), 26 | Label: Select("null"), 27 | Genre: Select("null"), 28 | Images: Select("ul.slides > li > img").Attribute("src").Replace("/upload", "https://fc2club.com/upload"), 29 | }.Extra("url", Preset(url)), 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /sites/getchu.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "golang.org/x/text/encoding/japanese" 7 | "net/http" 8 | "net/url" 9 | ) 10 | 11 | func Getchu(id string) Site { 12 | sStr, _ := japanese.EUCJP.NewEncoder().String("検索") 13 | keyStr, _ := japanese.EUCJP.NewEncoder().String(id) 14 | search := Site{ 15 | Key: id, 16 | Cookies: []http.Cookie{{Name: "adult_check_flag", Value: "1"}}, 17 | Charset: "euc-jp", 18 | Url: fmt.Sprintf("https://dl.getchu.com/search/search_list.php?search_keyword=%s"+ 19 | "&dojin=1&search_category_id=&action=search&btnWordSearch=%s", 20 | url.QueryEscape(keyStr), 21 | url.QueryEscape(sStr)), 22 | UserAgent: MobileUserAgent, 23 | Selector: Selector{}. 24 | Extra("search", Select("a[href^=\"https://dl.getchu.com/i/item\"]").Attribute("href")). 25 | Extra("match", Match(fmt.Sprintf("\\d{5,8}"))), 26 | } 27 | return Site{ 28 | Url: fmt.Sprintf("https://dl.getchu.com/i/item%s", id), 29 | UserAgent: MobileUserAgent, 30 | Cookies: []http.Cookie{{Name: "adult_check_flag", Value: "1"}}, 31 | Charset: "euc-jp", 32 | Path: "getchu/$Actor/ITEM-$Id $Title/", 33 | Search: &search, 34 | 35 | Selector: Selector{ 36 | Title: Select("meta[property=\"og:title\"]").Attribute("content").Replace("/", " "), 37 | Actor: Select("a[href^=\"https://dl.getchu.com/search/dojin_circle_detail.php\"]"), 38 | Poster: Select("meta[property=\"og:image\"]").Attribute("content"), 39 | Producer: Select("a[href^=\"https://dl.getchu.com/search/dojin_circle_detail.php\"]"), 40 | Sample: Select("a[href*=\".dl.getchu.com/download_sample_file.php\"]").Attribute("href"), 41 | Series: Select("a[href^=\"https://dl.getchu.com/search/dojin_circle_detail.php\"]"), 42 | Release: Match(`\d{4}/\d{2}/\d{2}`), 43 | Duration: Match(`動画.*分`), 44 | Id: Select("input[name=id]").Attribute("value"), 45 | Label: Select("null"), 46 | Genre: Select(".item-key > a"), 47 | Images: Select("a[href^=\"/data/item_img\"]").Attribute("href").Replace("/data", "http://dl.getchu.com/data"), 48 | }, 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /sites/heyzo.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func Heyzo(id string) Site { 9 | const providerId = "provider_id" 10 | 11 | return Site{ 12 | Url: fmt.Sprintf("http://m.heyzo.com/moviepages/%s/index.html", id), 13 | UserAgent: MobileUserAgent, 14 | 15 | Selector: Selector{ 16 | Title: Select("h1"), 17 | Actor: Select("strong.name"), 18 | Poster: Select("#gallery > div > a > img").Attribute("src").Replace("gallery/thumbnail_001.jpg", "images/player_thumbnail.jpg"), 19 | Producer: Preset("HEYZO"), 20 | Sample: Select("#gallery > div > a > img").Attribute("src").Replace("gallery/thumbnail_001.jpg", "sample.mp4"), 21 | Series: Select("#series").Replace("シリーズ:", ""), 22 | Release: Select("span.release").Replace("配信日:", ""), 23 | Duration: Select("span[itemprop=duration]"), 24 | Id: Select("input[name=movie_id]").Attribute("value"), 25 | Label: Select("null"), 26 | Genre: Select("#keyword > ul > ul > li > a"), 27 | Images: Select("#gallery > div > a > img").Attribute("src").Replace("thumbnail_", ""), 28 | }.Extra(providerId, Select("input[name=provider_id]").Attribute("value")), 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /sites/jav.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | ) 7 | 8 | func Jav(id string) Site { 9 | dmmId := parseDmmKey(id) 10 | url := fmt.Sprintf("https://www.javbus.com/%s", dmmId) 11 | return Site{ 12 | Key: parseDmmKey(id), 13 | Url: url, 14 | UserAgent: UserAgent, 15 | 16 | Selector: Selector{ 17 | Title: Select("h3").Replace(dmmId, ""), 18 | Actor: Select(".star-name"), 19 | Poster: Select(".bigImage").Attribute("href").Replace("/pics", "https://www.javbus.com/pics"), 20 | Producer: Select("a[href^=\"https://www.javbus.com/studio/\"]"), 21 | Sample: Select(".play-btn").Attribute("href"), 22 | Series: Select("a[href^=\"https://www.javbus.com/series\"]"), 23 | Release: Match(`\d{4}-\d{2}-\d{2}`), 24 | Duration: Match(`\d{0,4}分鐘`), 25 | Id: Match(`識別碼: [A-Z]{0,6}-\d{0,6}`).Replace("識別碼: ", ""), 26 | Label: Select("a[href^=\"https://www.javbus.com/label\"]"), 27 | Genre: Select(".genre > a[href^=\"https://www.javbus.com/genre\"]"), 28 | Images: Select("a.sample-box").Attribute("href"), 29 | }.Extra("actors", Selects(".star-name")), 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /sites/mgs.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "net/http" 7 | "path" 8 | "strings" 9 | ) 10 | 11 | func Mgs(id string) Site { 12 | 13 | id = parseMgsKey(id) 14 | 15 | return Site{ 16 | Url: fmt.Sprintf("https://sp.mgstage.com/product/product_detail/SP-%s/", id), 17 | UserAgent: MobileUserAgent, 18 | Cookies: []http.Cookie{{Name: "adc", Value: "1"}}, 19 | Path: "mgs/$Series/$Id $Title/", 20 | 21 | Selector: Selector{ 22 | Id: Preset(id), 23 | Title: Select(".sample-image-wrap.h1 > img").Attribute("alt"), 24 | Actor: Select("a.actor"), 25 | Poster: Select(".sample-image-wrap.h1").Attribute("href"), 26 | Sample: Select("#sample-movie").Attribute("src"), 27 | Series: Select("a.series"), 28 | Label: Select("null"), 29 | Images: Select("a[class^=\"sample-image-wrap sample\"]").Attribute("href"), 30 | Duration: Match(`\d{2,}分`), 31 | Release: Match(`\d{4}/\d{2}/\d{2}`), 32 | Producer: Match(`メーカー\s.*\s`).Replace("メーカー", ""), 33 | Genre: Select(".info > dl > dd > a"), 34 | }, 35 | } 36 | } 37 | 38 | func parseMgsKey(key string) string { 39 | ext := path.Ext(key) 40 | return strings.ToUpper(strings.TrimSuffix(key, ext)) 41 | } 42 | -------------------------------------------------------------------------------- /sites/pondo.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | . "github.com/ruriio/tidy/model" 7 | . "github.com/ruriio/tidy/selector" 8 | "log" 9 | ) 10 | 11 | func Pondo(id string) Site { 12 | next := Site{ 13 | Url: fmt.Sprintf("https://www.1pondo.tv/dyn/dla/json/movie_gallery/%s.json", id), 14 | UserAgent: MobileUserAgent, 15 | Json: true, 16 | Decor: PondoDecor{}, 17 | 18 | Selector: Selector{ 19 | Images: Query("Rows"), 20 | }, 21 | } 22 | 23 | site := Site{ 24 | Url: fmt.Sprintf("https://www.1pondo.tv/dyn/phpauto/movie_details/movie_id/%s.json", id), 25 | WebUrl: fmt.Sprintf("https://www.1pondo.tv/movies/%s/", id), 26 | UserAgent: MobileUserAgent, 27 | Json: true, 28 | 29 | Selector: Selector{ 30 | Title: Query("Title"), 31 | Actor: Query("Actor"), 32 | Poster: Query("ThumbHigh"), 33 | Producer: Preset("1Pondo"), 34 | Sample: Preset(fmt.Sprintf("http://smovie.1pondo.tv/sample/movies/%s/1080p.mp4", id)), 35 | Series: Query("Series"), 36 | Release: Query("Release"), 37 | Duration: Query("Duration"), 38 | Id: Query("MovieID"), 39 | Label: Preset(""), 40 | Genre: Query("UCNAME"), 41 | }, 42 | Next: &next, 43 | } 44 | 45 | return site 46 | } 47 | 48 | type PondoDecor struct { 49 | Decor 50 | } 51 | 52 | type PondoImage struct { 53 | Img string 54 | Filename string 55 | Protected bool 56 | } 57 | 58 | func (decor PondoDecor) Decorate(meta *Meta) *Meta { 59 | origin := meta.Images 60 | 61 | if len(origin) > 0 { 62 | var images []string 63 | var pondo PondoImage 64 | for _, s := range origin { 65 | err := json.Unmarshal([]byte(s), &pondo) 66 | if err != nil { 67 | log.Fatal(err) 68 | } 69 | if !pondo.Protected { 70 | img := pondo.Img 71 | if len(img) == 0 { 72 | img = pondo.Filename 73 | } 74 | images = append(images, "https://www.1pondo.tv/dyn/dla/images/"+img) 75 | } 76 | } 77 | meta.Images = images 78 | } 79 | return meta 80 | } 81 | -------------------------------------------------------------------------------- /sites/tokyo.go: -------------------------------------------------------------------------------- 1 | package sites 2 | 3 | import ( 4 | "fmt" 5 | . "github.com/ruriio/tidy/selector" 6 | "path" 7 | "regexp" 8 | "strings" 9 | ) 10 | 11 | func Tokyo(id string) Site { 12 | id = parseTokyoKey(id) 13 | 14 | search := Site{ 15 | Url: fmt.Sprintf("https://my.tokyo-hot.com/product/?q=%s&lang=jp", id), 16 | UserAgent: MobileUserAgent, 17 | Selector: Selector{}.Extra("search", Select("a.rm").Attribute("href"). 18 | Format("https://my.tokyo-hot.com/%s?lang=jp")), 19 | } 20 | 21 | return Site{ 22 | Url: fmt.Sprintf("https://my.tokyo-hot.com/product/%s/?lang=jp", id), 23 | UserAgent: MobileUserAgent, 24 | Path: "tokyo/$Actor/$Id $Title/", 25 | Search: &search, 26 | 27 | Selector: Selector{ 28 | Title: Select(".pagetitle"), 29 | Actor: Select("div.infowrapper > dl > dd:nth-child(2) > a"), 30 | Poster: Select("video").Attribute("poster").Replace("410x231", "820x462"), 31 | Producer: Preset("Tokyo"), 32 | Sample: Select("source").Attribute("src"), 33 | Series: Select("div.infowrapper > dl > dd > a[href^=\"/product/?type=genre\"]"), 34 | Release: Match(`\d{4}/\d{2}/\d{2}`), 35 | Duration: Match(`\d{2}:\d{2}:\d{2}`), 36 | Id: Select("input[name=\"product_uid\"]").Attribute("value"), 37 | Label: Select("div.infowrapper > dl > dd > a[href^=\"/product/?type=vendor\"], div.infowrapper > dl > dd > a[href^=\"/product/?vendor\"]"), 38 | Genre: Select("div.infowrapper > dl > dd > a[href^=\"/product/?type=tag\"]"), 39 | Images: Select("a[rel=\"cap\"]").Attribute("href").Replace(" ", "%20"), 40 | }, 41 | } 42 | } 43 | 44 | func parseTokyoKey(key string) string { 45 | ext := path.Ext(key) 46 | name := strings.ToLower(strings.TrimSuffix(key, ext)) 47 | re := regexp.MustCompile(`n-? ?\d{2,}`) 48 | 49 | matches := re.FindAllString(name, -1) 50 | 51 | if len(matches) > 0 { 52 | return matches[0] 53 | } 54 | return "nil" 55 | } 56 | -------------------------------------------------------------------------------- /util/files.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "log" 7 | "os" 8 | "path" 9 | "strings" 10 | ) 11 | 12 | func mkdir(name string) { 13 | if !exists(name) { 14 | err := os.MkdirAll(name, os.ModePerm) 15 | check(err) 16 | } 17 | } 18 | 19 | func mkdirParent(dest string) { 20 | dir := path.Dir(path.Clean(dest)) 21 | err := os.MkdirAll(dir, 0777) 22 | check(err) 23 | } 24 | 25 | func Move(from string, to string) string { 26 | 27 | dest := to 28 | 29 | if !exists(from) { 30 | return to 31 | } 32 | 33 | if isFile(from) && strings.HasSuffix(to, "/") { 34 | name := path.Base(from) 35 | dest = to + name 36 | } 37 | 38 | if IsDirectory(from) && !exists(dest) { 39 | // rename dir name directly 40 | mkdirParent(dest) 41 | err := os.Rename(from, dest) 42 | check(err) 43 | return dest 44 | } else { 45 | if IsDirectory(from) { 46 | // prevent dir be moved to same name sub dir 47 | dest = strings.TrimSuffix(dest, "/") 48 | } 49 | 50 | dir := path.Dir(dest) 51 | mkdir(dir) 52 | 53 | name := path.Base(dest) 54 | file := path.Join(dir, name) 55 | 56 | var ext string 57 | 58 | if isFile(from) { 59 | ext = path.Ext(name) 60 | } 61 | 62 | base := strings.TrimSuffix(name, ext) 63 | 64 | count := 1 65 | for exists(file) { 66 | name = fmt.Sprintf("%s-%d%s", base, count, ext) 67 | file = path.Join(dir, name) 68 | count++ 69 | } 70 | 71 | err := os.Rename(from, file) 72 | check(err) 73 | 74 | if isFile(file) { 75 | return path.Dir(file) 76 | } else { 77 | return file 78 | } 79 | } 80 | } 81 | 82 | func Write(file string, data []byte) { 83 | dir := path.Dir(file) 84 | mkdir(dir) 85 | 86 | err := ioutil.WriteFile(file, data, 0777) 87 | check(err) 88 | } 89 | 90 | func check(err error) { 91 | if err != nil { 92 | log.Fatal(err) 93 | } 94 | } 95 | 96 | func exists(name string) bool { 97 | _, err := os.Stat(name) 98 | if err != nil { 99 | if os.IsNotExist(err) { 100 | return false 101 | } 102 | } 103 | return err == nil 104 | } 105 | 106 | func IsDirectory(path string) bool { 107 | fileInfo, err := os.Stat(path) 108 | if err != nil { 109 | return false 110 | } 111 | return fileInfo.IsDir() 112 | } 113 | 114 | func isFile(path string) bool { 115 | fi, err := os.Stat(path) 116 | if err != nil { 117 | return false 118 | } 119 | return fi.Mode().IsRegular() 120 | } 121 | -------------------------------------------------------------------------------- /util/json.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | ) 7 | 8 | func JSONMarshal(t interface{}) ([]byte, error) { 9 | buffer := &bytes.Buffer{} 10 | encoder := json.NewEncoder(buffer) 11 | encoder.SetEscapeHTML(false) 12 | encoder.SetIndent("", " ") 13 | err := encoder.Encode(t) 14 | return buffer.Bytes(), err 15 | } 16 | -------------------------------------------------------------------------------- /util/medias.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/tls" 5 | "fmt" 6 | "io" 7 | "log" 8 | "net/http" 9 | "os" 10 | "os/exec" 11 | "path" 12 | "strings" 13 | ) 14 | 15 | func DownloadMedias(dir string, poster string, sample string, images []string) { 16 | 17 | if len(poster) > 0 { 18 | download(path.Join(dir, "poster.jpg"), poster) 19 | } 20 | 21 | if len(sample) > 0 { 22 | if strings.HasSuffix(sample, ".m3u8") { 23 | downloadM3u8(path.Join(dir, "sample.mp4"), sample) 24 | } else { 25 | download(path.Join(dir, "sample.mp4"), sample) 26 | } 27 | } 28 | 29 | for i, url := range images { 30 | mkdir(path.Join(dir, "images")) 31 | download(path.Join(dir, "images", fmt.Sprintf("%d.jpg", i)), url) 32 | } 33 | 34 | } 35 | 36 | func download(filepath string, url string) { 37 | out, err := os.Create(filepath) 38 | if out != nil { 39 | defer out.Close() 40 | } 41 | 42 | if err != nil { 43 | log.Fatal(err) 44 | } 45 | 46 | tr := &http.Transport{ 47 | TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, 48 | } 49 | client := &http.Client{Transport: tr} 50 | resp, err := client.Get(url) 51 | if resp != nil { 52 | defer resp.Body.Close() 53 | } 54 | 55 | if err != nil { 56 | log.Fatal(err) 57 | } 58 | 59 | io.Copy(out, resp.Body) 60 | } 61 | 62 | func downloadM3u8(filepath string, url string) { 63 | cmd := exec.Command("ffmpeg", "-i", url, "-bsf:a", "aac_adtstoasc", 64 | "-vcodec", "copy", "-c", "copy", "-crf", "50", filepath) 65 | //cmd.Stderr = os.Stderr 66 | //cmd.Stdout = os.Stdout 67 | 68 | log.Println(cmd.String()) 69 | 70 | err := cmd.Run() 71 | if err != nil { 72 | log.Println(err) 73 | } 74 | } 75 | --------------------------------------------------------------------------------