38 | );
39 | };
40 |
41 | export default TableEntry;
42 |
--------------------------------------------------------------------------------
/client/src/Icons.js:
--------------------------------------------------------------------------------
1 | import React from 'react';
2 | import SvgIcon from '@material-ui/core/SvgIcon';
3 |
4 | export const LinkIcon = () => {
5 | return (
6 |
7 |
8 |
9 | );
10 | };
11 |
12 | export const GitHubIcon = () => {
13 | return (
14 |
15 |
16 |
17 | );
18 | };
19 |
20 | export const LastFmIcon = () => {
21 | return (
22 |
23 |
24 |
25 | );
26 | };
27 |
28 | export const HelpIcon = () => {
29 | return (
30 |
31 |
32 |
33 | );
34 | };
--------------------------------------------------------------------------------
/tests/artistRules.test.js:
--------------------------------------------------------------------------------
1 | const rules = require('../client/src/rules');
2 |
3 | describe('Check if the isDuplicateArtist method successfully detects duplicates', () => {
4 | test('Check if an artist contained in a \'&\' separated list of two artists is a duplicate', () => {
5 | expect(rules.isDuplicateArtist('21 Savage', '21 Savage & Metro Boomin', true)).toBe(true);
6 | });
7 | test('Check if an artist contained in a \',\' separated list of two artists is a duplicate', () => {
8 | expect(rules.isDuplicateArtist('Silk City', 'Silk City, Dua Lipa', true)).toBe(true);
9 | });
10 | test('Check if an artist contained in two \'&\' separated lists of artists is a duplicate', () => {
11 | expect(rules.isDuplicateArtist('Calvin Harris & Alesso', 'Calvin Harris & Disciples', true)).toBe(true);
12 | });
13 | test('Check if an artist contained in one \'&\' and one \',\' separated lists of artists is a duplicate', () => {
14 | expect(rules.isDuplicateArtist('Calvin Harris & Disciples', 'Calvin Harris, Dua Lipa', true)).toBe(true);
15 | });
16 | test('Check if an artist contained in a list of 3 artists is a duplicate', () => {
17 | expect(rules.isDuplicateArtist('Ellie Goulding', 'Ellie Goulding, Diplo & Swae Lee', true)).toBe(true);
18 | });
19 | test('Check if an artist contained in two large lists is a duplicate', () => {
20 | expect(rules.isDuplicateArtist('Kanye West, Big Sean, Pusha T & 2 Chainz', 'Kanye West, Chief Keef, Pusha T, Big Sean & Jadakiss', true)).toBe(true);
21 | });
22 | test('Check if an artist contained in a list of two artists and a list of three artists is a duplicate', () => {
23 | expect(rules.isDuplicateArtist('Lil Baby & Gunna', 'Lil Baby, Gunna & Drake', true)).toBe(true);
24 | });
25 | test('Potato Salad', () => {
26 | expect(rules.isDuplicateArtist('Tyler, the Creator', 'Tyler, The Creator & A$AP Rocky', true)).toBe(true);
27 | });
28 | });
29 |
30 | describe('Check if the isDuplicateArtist method successfully detects non duplicates', () => {
31 | test('Check if different artists with one matching the beginning of the other are not duplicates', () => {
32 | expect(rules.isDuplicateArtist('America', 'American Football', true)).toBe(false);
33 | });
34 | test('Check if different artists with with the same first name are not duplicates', () => {
35 | expect(rules.isDuplicateArtist('Anthony Green', 'Anthony Naples', true)).toBe(false);
36 | });
37 | test('Check if an artist with only first name is not a duplicate of another artist with same first name and a last name', () => {
38 | expect(rules.isDuplicateArtist('Arthur', 'Arthur Brown', true)).toBe(false);
39 | });
40 | test('Check if an artist with only first name is not a duplicate of another artist with same first name and a last name', () => {
41 | expect(rules.isDuplicateArtist('Desire', 'Desired', true)).toBe(false);
42 | });
43 | });
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Split Scrobble Finder
2 | ---
3 |
4 | 
5 |
6 | ## Background
7 |
8 | * This web app allows you to scan your Last.fm profile for split scrobbles. Split scrobbles can arise when streaming services change the metadata of a track, album or artist, such as adding a feature tag to the track title. Switching between music streaming platforms can also create split entries in your Last.fm profile, given that metadata is generally not standardized among all platforms.
9 | * The client is built with ReactJS and depends on an Express server. The server is responsible for making calls to the Last.fm API and propagating the results back to the client, which executes the algorithm and displays its results.
10 |
11 | ## Usage
12 |
13 | * The app is live at https://split-scrobble-finder.herokuapp.com/.
14 | * Simply input your username and select either tracks, albums, or artists to scan your profile for split scrobbles.
15 | * There is an option to "use rules". Selecting this option will use a custom rule set I developed to help eliminate false split scrobbles. For example, "Human After All" and "Human After All - SebastiAn Remix" would not be detected as split scrobbles if the rule set is enabled. Otherwise, standard string similarity would be used, which would result in those two tracks being detected as a duplicate.
16 | * I recommend trying requests in both fashions. If you find that the rule set fails to detect true split scrobbles or falsely identifies split scrobbles, feel free to raise an issue here on GitHub or message me with the issue. The rule set algorithm can always be improved!
17 | * Note that request times can be significant, as it requires numerous strenuous calls to the Last.fm API.
18 | * Once your results have been fetched, you can scroll through the detected split entries, where each entry provides a hyperlink to the specified track/album/artist in your library on the Last.fm site. You can then edit the scrobbles as desired on Last.fm.
19 | * You can also download your results in either JSON or CSV format, so you don't have to keep waiting for your results to be generated if you want to view your results again.
20 |
21 | ## Local Installation
22 |
23 | ### Prerequisites
24 |
25 | * Node
26 | * npm
27 |
28 | ### Instructions
29 |
30 | * To use this app locally, you must have your own Last.fm API credentials. See [here](https://www.last.fm/api/account/create) for more details.
31 | * Once you have an API key and a shared secret, update `credentials.json` with these values.
32 | * You must first build and launch the server. In the root directory, do the following:
33 | * Run `npm install` to install all server-side dependencies.
34 | * Run `npm start` to launch the server, which runs on port 3001 by default.
35 | * You can then build and launch the client. In the `client` directory, do the following:
36 | * Run `npm install` to install all client-side dependencies.
37 | * Run `rpm start` to launch the React client, which runs on port 3000 by default. The `package.json` file included in the `client` directory proxies requests to the server running on port 3001.
38 |
39 | ### Algorithm Tests
40 |
41 | * There is a set of tests found in the `tests` directory that ensure the algorithm works for various tag differences I have found in my own library.
42 | * Simply run `npm run test` to run all of these tests.
43 | * The GitHub workflow is set up to rerun these tests each time a commit is pushed to master.
--------------------------------------------------------------------------------
/client/src/serviceWorker.js:
--------------------------------------------------------------------------------
1 | // This optional code is used to register a service worker.
2 | // register() is not called by default.
3 |
4 | // This lets the app load faster on subsequent visits in production, and gives
5 | // it offline capabilities. However, it also means that developers (and users)
6 | // will only see deployed updates on subsequent visits to a page, after all the
7 | // existing tabs open on the page have been closed, since previously cached
8 | // resources are updated in the background.
9 |
10 | // To learn more about the benefits of this model and instructions on how to
11 | // opt-in, read https://bit.ly/CRA-PWA
12 |
13 | const isLocalhost = Boolean(
14 | window.location.hostname === 'localhost' ||
15 | // [::1] is the IPv6 localhost address.
16 | window.location.hostname === '[::1]' ||
17 | // 127.0.0.0/8 are considered localhost for IPv4.
18 | window.location.hostname.match(
19 | /^127(?:\.(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3}$/
20 | )
21 | );
22 |
23 | export function register(config) {
24 | if (process.env.NODE_ENV === 'production' && 'serviceWorker' in navigator) {
25 | // The URL constructor is available in all browsers that support SW.
26 | const publicUrl = new URL(process.env.PUBLIC_URL, window.location.href);
27 | if (publicUrl.origin !== window.location.origin) {
28 | // Our service worker won't work if PUBLIC_URL is on a different origin
29 | // from what our page is served on. This might happen if a CDN is used to
30 | // serve assets; see https://github.com/facebook/create-react-app/issues/2374
31 | return;
32 | }
33 |
34 | window.addEventListener('load', () => {
35 | const swUrl = `${process.env.PUBLIC_URL}/service-worker.js`;
36 |
37 | if (isLocalhost) {
38 | // This is running on localhost. Let's check if a service worker still exists or not.
39 | checkValidServiceWorker(swUrl, config);
40 |
41 | // Add some additional logging to localhost, pointing developers to the
42 | // service worker/PWA documentation.
43 | navigator.serviceWorker.ready.then(() => {
44 | console.log(
45 | 'This web app is being served cache-first by a service ' +
46 | 'worker. To learn more, visit https://bit.ly/CRA-PWA'
47 | );
48 | });
49 | } else {
50 | // Is not localhost. Just register service worker
51 | registerValidSW(swUrl, config);
52 | }
53 | });
54 | }
55 | }
56 |
57 | function registerValidSW(swUrl, config) {
58 | navigator.serviceWorker
59 | .register(swUrl)
60 | .then(registration => {
61 | registration.onupdatefound = () => {
62 | const installingWorker = registration.installing;
63 | if (installingWorker == null) {
64 | return;
65 | }
66 | installingWorker.onstatechange = () => {
67 | if (installingWorker.state === 'installed') {
68 | if (navigator.serviceWorker.controller) {
69 | // At this point, the updated precached content has been fetched,
70 | // but the previous service worker will still serve the older
71 | // content until all client tabs are closed.
72 | console.log(
73 | 'New content is available and will be used when all ' +
74 | 'tabs for this page are closed. See https://bit.ly/CRA-PWA.'
75 | );
76 |
77 | // Execute callback
78 | if (config && config.onUpdate) {
79 | config.onUpdate(registration);
80 | }
81 | } else {
82 | // At this point, everything has been precached.
83 | // It's the perfect time to display a
84 | // "Content is cached for offline use." message.
85 | console.log('Content is cached for offline use.');
86 |
87 | // Execute callback
88 | if (config && config.onSuccess) {
89 | config.onSuccess(registration);
90 | }
91 | }
92 | }
93 | };
94 | };
95 | })
96 | .catch(error => {
97 | console.error('Error during service worker registration:', error);
98 | });
99 | }
100 |
101 | function checkValidServiceWorker(swUrl, config) {
102 | // Check if the service worker can be found. If it can't reload the page.
103 | fetch(swUrl, {
104 | headers: { 'Service-Worker': 'script' }
105 | })
106 | .then(response => {
107 | // Ensure service worker exists, and that we really are getting a JS file.
108 | const contentType = response.headers.get('content-type');
109 | if (
110 | response.status === 404 ||
111 | (contentType != null && contentType.indexOf('javascript') === -1)
112 | ) {
113 | // No service worker found. Probably a different app. Reload the page.
114 | navigator.serviceWorker.ready.then(registration => {
115 | registration.unregister().then(() => {
116 | window.location.reload();
117 | });
118 | });
119 | } else {
120 | // Service worker found. Proceed as normal.
121 | registerValidSW(swUrl, config);
122 | }
123 | })
124 | .catch(() => {
125 | console.log(
126 | 'No internet connection found. App is running in offline mode.'
127 | );
128 | });
129 | }
130 |
131 | export function unregister() {
132 | if ('serviceWorker' in navigator) {
133 | navigator.serviceWorker.ready.then(registration => {
134 | registration.unregister();
135 | });
136 | }
137 | }
138 |
--------------------------------------------------------------------------------
/server.js:
--------------------------------------------------------------------------------
1 | const express = require('express');
2 | const path = require('path');
3 | const request = require('request');
4 | const credentials = require('./credentials');
5 | const app = express();
6 |
7 | // Serve static files from the React app
8 | app.use(express.static(path.join(__dirname, 'client/build')));
9 |
10 | app.get('/numtracks', validateNumReq, getNumTracks);
11 | app.get('/tracks', validateGetReq, getTracks);
12 |
13 | app.get('/numalbums', validateNumReq, getNumAlbums);
14 | app.get('/albums', validateGetReq, getAlbums);
15 |
16 | app.get('/numartists', validateNumReq, getNumArtists);
17 | app.get('/artists', validateGetReq, getArtists);
18 |
19 | function getOptions(params) {
20 | return {
21 | url: getURL(params),
22 | headers: {
23 | 'User-Agent': 'split-scrobble-finder'
24 | }
25 | }
26 | }
27 |
28 | function getURL(params) {
29 | let url = new URL('http://ws.audioscrobbler.com/2.0/');
30 | for (const [param, val] of Object.entries(params)) {
31 | url.searchParams.append(param, val)
32 | }
33 | url.searchParams.append('format', 'json');
34 | url.searchParams.append('api_key', credentials.API_KEY);
35 | return url;
36 | }
37 |
38 | function validateNumReq(req, res, next) {
39 | if (req.query.user) {
40 | next();
41 | } else {
42 | res.status(400).json({error: 'Bad request - missing user parameter'});
43 | }
44 | }
45 |
46 | function validateGetReq(req, res, next) {
47 | if (req.query.user && req.query.pageNum) {
48 | next();
49 | } else {
50 | res.status(400).json({error: 'Bad request - missing parameter'});
51 | }
52 | }
53 |
54 | function getNumTracks(req, res) {
55 | request(getOptions({method: 'user.getTopTracks', user: req.query.user, limit: 1}), function (error, response, body) {
56 | if (error) {
57 | console.log('Internal server error: ' + error);
58 | res.status(500).json({error: 'Internal error.'});
59 | } else if (response.statusCode != 200) {
60 | console.log('Last.fm API error in getNumTracks');
61 | let errMessage = 'Last.fm error';
62 | if (JSON.parse(body).message) {
63 | errMessage += ': ' + JSON.parse(body).message;
64 | }
65 | res.status(400).json({error: errMessage});
66 | } else {
67 | res.status(200).send(JSON.parse(body)['toptracks']['@attr']['total']);
68 | }
69 | });
70 | }
71 |
72 | function getTracks(req, res) {
73 | request(getOptions({method: 'user.getTopTracks', user: req.query.user, limit: 1000, page: req.query.pageNum}), function (error, response, body) {
74 | if (error) {
75 | console.log('Internal server error: ' + error);
76 | res.status(500).json({error: 'Internal error.'});
77 | } else if (response.statusCode != 200) {
78 | console.log('Last.fm API error in getTracks');
79 | let errMessage = 'Last.fm error';
80 | if (JSON.parse(body).message) {
81 | errMessage += ': ' + JSON.parse(body).message;
82 | }
83 | res.status(400).json({error: errMessage});
84 | } else if (!JSON.parse(body).toptracks) {
85 | console.log('Last.fm API error - no top tracks');
86 | res.status(500).json({error: 'Internal error.'});
87 | } else {
88 | res.status(200).json(JSON.parse(body).toptracks.track.map(track => ({artist: track.artist.name, name: track.name})));
89 | }
90 | });
91 | }
92 |
93 | function getNumAlbums(req, res) {
94 | request(getOptions({method: 'user.getTopAlbums', user: req.query.user, limit: 1}), function (error, response, body) {
95 | if (error) {
96 | console.log('Internal server error: ' + error);
97 | res.status(500).json({error: 'Internal error.'});
98 | } else if (response.statusCode != 200) {
99 | console.log('Last.fm API error in getNumAlbums');
100 | let errMessage = 'Last.fm error';
101 | if (JSON.parse(body).message) {
102 | errMessage += ': ' + JSON.parse(body).message;
103 | }
104 | res.status(400).json({error: errMessage});
105 | } else {
106 | res.status(200).send(JSON.parse(body)['topalbums']['@attr']['total']);
107 | }
108 | });
109 | }
110 |
111 | function getAlbums(req, res) {
112 | request(getOptions({method: 'user.getTopAlbums', user: req.query.user, limit: 1000, page: req.query.pageNum}), function (error, response, body) {
113 | if (error) {
114 | console.log('Internal server error: ' + error);
115 | res.status(500).json({error: 'Internal error.'});
116 | } else if (response.statusCode != 200) {
117 | console.log('Last.fm API error in getAlbums');
118 | let errMessage = 'Last.fm error';
119 | if (JSON.parse(body).message) {
120 | errMessage += ': ' + JSON.parse(body).message;
121 | }
122 | res.status(400).json({error: errMessage});
123 | } else if (!JSON.parse(body).topalbums) {
124 | console.log('Last.fm API error - no top albums');
125 | res.status(500).json({error: 'Internal error.'});
126 | } else {
127 | res.status(200).json(JSON.parse(body).topalbums.album.map(album => ({artist: album.artist.name, name: album.name})));
128 | }
129 | });
130 | }
131 |
132 | function getNumArtists(req, res) {
133 | request(getOptions({method: 'user.getTopArtists', user: req.query.user, limit: 1}), function (error, response, body) {
134 | if (error) {
135 | console.log('Internal server error: ' + error);
136 | res.status(500).json({error: 'Internal error.'});
137 | } else if (response.statusCode != 200) {
138 | console.log('Last.fm API error in getNumArtists');
139 | let errMessage = 'Last.fm error';
140 | if (JSON.parse(body).message) {
141 | errMessage += ': ' + JSON.parse(body).message;
142 | }
143 | res.status(400).json({error: errMessage});
144 | } else {
145 | res.status(200).send(JSON.parse(body)['topartists']['@attr']['total']);
146 | }
147 | });
148 | }
149 |
150 | function getArtists(req, res) {
151 | request(getOptions({method: 'user.getTopArtists', user: req.query.user, limit: 1000, page: req.query.pageNum}), function (error, response, body) {
152 | if (error) {
153 | console.log('Internal server error: ' + error);
154 | res.status(500).json({error: 'Internal error.'});
155 | } else if (response.statusCode != 200) {
156 | console.log('Last.fm API error in getArtists');
157 | let errMessage = 'Last.fm error';
158 | if (JSON.parse(body).message) {
159 | errMessage += ': ' + JSON.parse(body).message;
160 | }
161 | res.status(400).json({error: errMessage});
162 | } else if (!JSON.parse(body).topartists) {
163 | console.log('Last.fm API error - no top artists');
164 | res.status(500).json({error: 'Internal error.'});
165 | } else {
166 | res.status(200).json(JSON.parse(body).topartists.artist.map(artist => artist.name));
167 | }
168 | });
169 | }
170 |
171 | app.get('/*', (req, res) => {
172 | let url = path.join(__dirname, './client/build', 'index.html');
173 | res.sendFile(url);
174 | });
175 |
176 | const port = process.env.PORT || 3001;
177 | app.listen(port);
178 |
179 | console.log(`Server listening on ${port}...`);
--------------------------------------------------------------------------------
/tests/albumRules.test.js:
--------------------------------------------------------------------------------
1 | const rules = require('../client/src/rules');
2 |
3 | describe('Check if the isDuplicateAlbum method successfully detects duplicates', () => {
4 | test('Check if a deluxe album is a duplicate', () => {
5 | expect(rules.isDuplicateAlbum('Take Care', 'Take Care (Deluxe)', true)).toBe(true);
6 | });
7 | test('Check if an extended album is a duplicate', () => {
8 | expect(rules.isDuplicateAlbum('Pure Heroine (Extended)', 'Pure Heroine', true)).toBe(true);
9 | });
10 | test('Check if a single with excess feature tag is a duplicate', () => {
11 | expect(rules.isDuplicateAlbum('One Out Of Two', 'One Out Of Two (feat. Irfane)', true)).toBe(true);
12 | });
13 | test('Check if a single with features listed in different order is a duplicate', () => {
14 | expect(rules.isDuplicateAlbum('Watch (feat. Kanye West & Lil Uzi Vert)', 'Watch (feat. Lil Uzi Vert & Kanye West)', true)).toBe(true);
15 | });
16 | test('Check if a single with \'single\' listed in the title is a duplicate', () => {
17 | expect(rules.isDuplicateAlbum('This Is America', 'This Is America - Single', true)).toBe(true);
18 | });
19 | test('Check if an EP with \'EP\' listed in the title is a duplicate', () => {
20 | expect(rules.isDuplicateAlbum('Rogue Waves', 'Rogue Waves - EP', true)).toBe(true);
21 | });
22 | test('Check if an album with extra nonalphanumeric characters is a duplicate', () => {
23 | expect(rules.isDuplicateAlbum('Born to Die (The Paradise Edition)', 'Born to Die - The Paradise Edition', true)).toBe(true);
24 | });
25 | test('Check if a single with an excess feature tag listed as \'single\' is a duplicate', () => {
26 | expect(rules.isDuplicateAlbum('Drug Dealers Anonymous', 'Drug Dealers Anonymous (feat. JAY Z) - Single', true)).toBe(true);
27 | });
28 | test('Carly Slay Jepsen', () => {
29 | expect(rules.isDuplicateAlbum('E\u00b7MO\u00b7TION Side B', 'EMOTION SIDE B +', true)).toBe(true);
30 | });
31 | test('last dinos', () => {
32 | expect(rules.isDuplicateAlbum('In A Million Years', 'In A Million Years (Tour Edition)', true)).toBe(true);
33 | });
34 | test('Check if album with extra whitespace is a duplicate', () => {
35 | expect(rules.isDuplicateAlbum('Funk Wav Bounces Vol. 1', 'Funk Wav Bounces Vol.1', true)).toBe(true);
36 | });
37 | test('Check if the explicit version of an album is a duplicate', () => {
38 | expect(rules.isDuplicateAlbum('Man On The Moon II: The Legend Of Mr. Rager (Explicit Version)', 'Man On The Moon, Vol. II: The Legend Of Mr. Rager', true)).toBe(true);
39 | });
40 | test('Check if an album missing \'vol\' is a duplicate', () => {
41 | expect(rules.isDuplicateAlbum('Man On The Moon, Vol. II: The Legend Of Mr. Rager', 'Man on the Moon II: The Legend of Mr. Rager', true)).toBe(true);
42 | });
43 | test('Check if a deluxe version of an album is a duplicate', () => {
44 | expect(rules.isDuplicateAlbum('Sun Structures', 'Sun Structures (Deluxe Version)', true)).toBe(true);
45 | });
46 | test('Check if an album using an extra ellipsis is a duplicate', () => {
47 | expect(rules.isDuplicateAlbum('Magna Carta Holy Grail', 'Magna Carta... Holy Grail', true)).toBe(true);
48 | });
49 | test('Check if an album name with an extra article is a duplicate', () => {
50 | expect(rules.isDuplicateAlbum('Vol. 3: Life and Times of S. Carter', 'Vol. 3: The Life and Times of S. Carter', true)).toBe(true);
51 | });
52 | test('Check if a deluxe edition of an album is a duplicate', () => {
53 | expect(rules.isDuplicateAlbum('Everything You\'ve Come To Expect (Deluxe Edition)', 'Everything You\u2019ve Come To Expect', true)).toBe(true);
54 | });
55 | test('Check if an abbreviation without punctuation is a duplicate', () => {
56 | expect(rules.isDuplicateAlbum('Sept 5th', 'Sept. 5th', true)).toBe(true);
57 | });
58 | test('Check if an album listed as bonus track is a duplicate', () => {
59 | expect(rules.isDuplicateAlbum('The Bird Of Music', 'The Bird Of Music (Bonus Track)', true)).toBe(true);
60 | });
61 | test('Check if the expanded and deluxe edition of an album are duplicates', () => {
62 | expect(rules.isDuplicateAlbum('Hip Hop Is Dead (Deluxe Edition)', 'Hip Hop Is Dead (Expanded Edition)', true)).toBe(true);
63 | });
64 | test('Check if a \/ with different spacing is a duplicate', () => {
65 | expect(rules.isDuplicateAlbum('Speakerboxxx \/ The Love Below', 'Speakerboxxx\/The Love Below', true)).toBe(true);
66 | });
67 | test('Check if a special edition of an album is a duplicate', () => {
68 | expect(rules.isDuplicateAlbum('The Bones of What You Believe', 'The Bones of What You Believe (Special Edition)', true)).toBe(true);
69 | });
70 | test('Check if an expanded edition marked in square brackets is a duplicate', () => {
71 | expect(rules.isDuplicateAlbum('Enter The Wu-Tang (36 Chambers) [Expanded Edition]', 'Enter the Wu-Tang (36 Chambers)', true)).toBe(true);
72 | });
73 | });
74 |
75 | describe('Check if the isDuplicateAlbum method successfully detects non duplicates', () => {
76 | test('Check if an album of instrumentals is not a duplicate', () => {
77 | expect(rules.isDuplicateAlbum('Cherry Bomb', 'Cherry Bomb + Instrumentals', true)).toBe(false);
78 | });
79 | test('Check if the b sides for two different albums are not duplicates', () => {
80 | expect(rules.isDuplicateAlbum('Currents B-Sides & Remixes', 'InnerSpeaker B-Sides & Remixes', true)).toBe(false);
81 | });
82 | test('Check if the solo version of a single is not a duplicate', () => {
83 | expect(rules.isDuplicateAlbum('Biking', 'Biking (Solo)', true)).toBe(false);
84 | });
85 | test('Check if the b side of an album is not a duplicate', () => {
86 | expect(rules.isDuplicateAlbum('E\u00b7MO\u00b7TION', 'E\u00b7MO\u00b7TION Side B', true)).toBe(false);
87 | });
88 | test('Check if a remix album is not a duplicate', () => {
89 | expect(rules.isDuplicateAlbum('Human After All', 'Human After All (Remixes)', true)).toBe(false);
90 | });
91 | test('Check if different remixes of a single are not duplicates', () => {
92 | expect(rules.isDuplicateAlbum('Don\'t Leave Me Lonely (Claptone Remix)', 'Don\'t Leave Me Lonely (Purple Disco Machine Remix)', true)).toBe(false);
93 | });
94 | test('Check if a remix of a single is not a duplicate', () => {
95 | expect(rules.isDuplicateAlbum('Late Night Feelings', 'Late Night Feelings (Channel Tres Remix)', true)).toBe(false);
96 | });
97 | test('Check if the sequel to an album using numeric characters is not a duplicate', () => {
98 | expect(rules.isDuplicateAlbum('Luv Is Rage', 'Luv Is Rage 2', true)).toBe(false);
99 | });
100 | test('Check if the sequel to an album using roman numerals is not a duplicate', () => {
101 | expect(rules.isDuplicateAlbum('Culture', 'Culture II', true)).toBe(false);
102 | });
103 | test('Check if the sequel to an album using written words is not a duplicate', () => {
104 | expect(rules.isDuplicateAlbum('PARTYNEXTDOOR', 'PARTYNEXTDOOR TWO', true)).toBe(false);
105 | });
106 | test('Check if two different volumes of an album using roman numerals are not duplicates', () => {
107 | expect(rules.isDuplicateAlbum('Superclean, Vol. I', 'Superclean, Vol. II', true)).toBe(false);
108 | });
109 | test('Check if an edit of a single is not a duplicate', () => {
110 | expect(rules.isDuplicateAlbum('Better Now', 'Better Now (Edit)', true)).toBe(false);
111 | });
112 | test('Check if two soundtracks by the same artist are not duplicates', () => {
113 | expect(rules.isDuplicateAlbum('Blade Runner 2049 (Original Motion Picture Soundtrack)', 'The Dark Knight (Collectors Edition) [Original Motion Picture Soundtrack]', true)).toBe(false);
114 | });
115 | test('Check if single remixes by two different artists are not duplicates', () => {
116 | expect(rules.isDuplicateAlbum('Lightenup (Alex Metric Remix)', 'Lightenup (Breakbot Remix)', true)).toBe(false);
117 | });
118 | test('Check if a remix EP is not a duplicate', () => {
119 | expect(rules.isDuplicateAlbum('For All We Know', 'For All We Know - The Remixes - EP', true)).toBe(false);
120 | });
121 | test('Check if a live album is not a duplicate', () => {
122 | expect(rules.isDuplicateAlbum('Carrie & Lowell', 'Carrie & Lowell Live', true)).toBe(false);
123 | });
124 | test('Check if a \"reloaded\" album is not a duplicate', () => {
125 | expect(rules.isDuplicateAlbum('88GLAM RELOADED', '88GLAM2', true)).toBe(false);
126 | });
127 | test('Check an album with \'ep\' in it without actually being an EP is not a duplicate', () => {
128 | expect(rules.isDuplicateAlbum('The Blueprint', 'The Blueprint 2: The Gift & the Curse', true)).toBe(false);
129 | });
130 | });
131 |
--------------------------------------------------------------------------------
/client/src/rules.js:
--------------------------------------------------------------------------------
1 | import { compareTwoStrings } from 'string-similarity';
2 | const featKeywords = ['feat. ', 'feat ', 'ft. ', 'ft ', 'with ', 'featuring '];
3 | const romanNumVals = { m: 1000, f: 500, c: 100, l: 50, x: 10, v: 5, i: 1 };
4 | const exemptKeywords = ['remix', 'mix', 'instrumental', 'live', 'edit', 'alt', 'demo', 'version', 'a cappella', 'interlude', 'reprise', 'continued', 'single', 'acoustic'];
5 | const albumKeywords = ['deluxe', 'expanded', 'extended', 'single', ' ep', 'tour edition', 'explicit version', 'deluxe version', 'expanded version', 'extended version', 'deluxe edition', 'expanded edition', 'extended edition', 'bonus track', 'special edition'];
6 |
7 | export function isDuplicateTrack(track1, track2, useRules) {
8 | if (!useRules) {
9 | return compareTwoStrings(track1, track2) > 0.5;
10 | }
11 |
12 | track1 = track1.toLowerCase();
13 | track2 = track2.toLowerCase();
14 | if (compareTwoStrings(track1, track2) < 0.5) {
15 | if (!track1.startsWith(track2) && !track2.startsWith(track1)) {
16 | return false;
17 | }
18 | }
19 | if (isExempt(track1, track2, exemptKeywords)) {
20 | return false;
21 | }
22 | return isMatched(track1, track2);
23 | }
24 |
25 | export function isDuplicateAlbum(album1, album2, useRules) {
26 | if (!useRules) {
27 | return compareTwoStrings(album1, album2) > 0.5;
28 | }
29 |
30 | album1 = album1.toLowerCase();
31 | album2 = album2.toLowerCase();
32 |
33 | album1 = stripAlbumTag(album1);
34 | album2 = stripAlbumTag(album2);
35 |
36 | if (compareTwoStrings(album1, album2) < 0.5) {
37 | if (!album1.startsWith(album2) && !album2.startsWith(album1)) {
38 | return false;
39 | }
40 | }
41 | if (isExempt(album1, album2, exemptKeywords)) {
42 | return false;
43 | }
44 | return isMatched(album1, album2);
45 | }
46 |
47 | export function isDuplicateArtist(artist1, artist2, useRules) {
48 | if (!useRules) {
49 | return compareTwoStrings(artist1, artist2) > 0.5;
50 | }
51 |
52 | artist1 = artist1.toLowerCase();
53 | artist2 = artist2.toLowerCase();
54 |
55 | if (compareTwoStrings(artist1, artist2) < 0.5) {
56 | if (!artist1.startsWith(artist2) && !artist2.startsWith(artist1)) {
57 | return false;
58 | }
59 | }
60 |
61 | if (analyzeArtistList(artist1, artist2)) {
62 | return true;
63 | }
64 | return isMatched(artist1, artist2);
65 | }
66 |
67 | function isExempt(track1, track2, exemptKeywords) {
68 | for (let keyword of exemptKeywords) {
69 | let isTrack1Matched = track1.includes(keyword);
70 | let isTrack2Matched = track2.includes(keyword);
71 | if ((isTrack1Matched && !isTrack2Matched) || (isTrack2Matched && !isTrack1Matched)) {
72 | return true;
73 | }
74 | }
75 | return false;
76 | }
77 |
78 | function isMatched(str1, str2) {
79 | let track1Features;
80 | if (containsFeatureTag(str1)) {
81 | track1Features = getFeaturedArtists(str1);
82 | }
83 |
84 | let track2Features;
85 | if (containsFeatureTag(str2)) {
86 | track2Features = getFeaturedArtists(str2);
87 | }
88 |
89 | if (track1Features && track2Features) {
90 | if (compareTwoStrings(track1Features, track2Features) < 0.5) {
91 | return false;
92 | }
93 |
94 | let excess1 = str1.replace(track1Features, '');
95 | let excess2 = str2.replace(track2Features, '');
96 |
97 | excess1 = stripNonAlphaNumeric(excess1);
98 | excess2 = stripNonAlphaNumeric(excess2);
99 |
100 | excess1 = stripExcessWhitespace(excess1);
101 | excess2 = stripExcessWhitespace(excess2);
102 |
103 | if (excess1 && excess2 && !analyzeFeatureTagExcess(excess1, excess2)) {
104 | return false;
105 | }
106 | }
107 |
108 | str1 = stripNonAlphaNumeric(str1);
109 | str2 = stripNonAlphaNumeric(str2);
110 |
111 | str1 = stripRemasteredTag(str1);
112 | str2 = stripRemasteredTag(str2);
113 |
114 | str1 = stripFeatureTag(str1);
115 | str2 = stripFeatureTag(str2);
116 |
117 | str1 = stripExcessWhitespace(str1);
118 | str2 = stripExcessWhitespace(str2);
119 |
120 | let words = getWords(str1, str2);
121 |
122 | if (words.split1.length === 1 && words.split2.length === 1) {
123 | return words.split1[0] === words.split2[0];
124 | }
125 | return analyzeWords(words.split1, words.split2);
126 | }
127 |
128 | function stripNonAlphaNumeric(str) {
129 | str = str.replace(/:|\//g,' '); // it is likely that a slash or a colon separates two words, so the words should be kept separate
130 | return str.replace(/[^A-Za-z0-9\s]/g, '');
131 | }
132 |
133 | function stripExcessWhitespace(str) {
134 | return str.replace(/\s\s+/g, ' ').trim();
135 | }
136 |
137 | function getWords(str1, str2) {
138 | let split1 = str1.split(' ');
139 | let split2 = str2.split(' ');
140 |
141 | let length = split1.length > split2.length ? split1.length : split2.length;
142 | // Search for extraneous words within the string, starting at the second word
143 | for (let i = 0; i < length; i++) {
144 | if (!split1[i] || !split2[i]) {
145 | break;
146 | }
147 |
148 | if (compareTwoStrings(split1[i], split2[i]) < 0.5) {
149 | if (split1[i + 1] && compareTwoStrings(split1[i + 1], split2[i]) > compareTwoStrings(split1[i], split2[i]) && !exemptKeywords.includes(split1[i + 1])) {
150 | split1.splice(i, 1);
151 | i--;
152 | } else if (split2[i + 1] && compareTwoStrings(split2[i + 1], split1[i]) > compareTwoStrings(split2[i], split1[i]) && !exemptKeywords.includes(split2[i + 1])) {
153 | split2.splice(i, 1);
154 | i--;
155 | }
156 | }
157 | }
158 | return { split1: split1, split2: split2 };
159 | }
160 |
161 | function analyzeWords(split1, split2) {
162 | if (split1.length !== split2.length) {
163 | return false;
164 | }
165 |
166 | for (let i = 0; i < split1.length; i++) {
167 | if (!isNaN((split1[i])) && !isNaN(split2[i]) && split1[i] !== split2[i]) {
168 | return false;
169 | }
170 | if (!isNaN(split1[i]) && isNaN(split2[i])) {
171 | if (isRomanNum(split2[i]) && convertRomanNumToInt(split2[i]) === parseInt(split1[i])) {
172 | continue;
173 | }
174 | }
175 | if (isNaN(split1[i]) && !isNaN(split2[i])) {
176 | if (isRomanNum(split1[i]) && convertRomanNumToInt(split1[i]) === parseInt(split2[i])) {
177 | continue;
178 | }
179 | }
180 | if ((split1[i] === 'pt' || split1[i] === 'part') && (split2[i] === 'pt' || split2[i] === 'part')) {
181 | continue;
182 | }
183 |
184 | if (compareTwoStrings(split1[i], split2[i]) < 0.80) {
185 | return false;
186 | }
187 | }
188 | return true;
189 | }
190 |
191 | function containsFeatureTag(str) {
192 | for (let featKeyword of featKeywords) {
193 | if (str.includes(featKeyword)) {
194 | return true;
195 | }
196 | }
197 | return false;
198 | }
199 |
200 | function getFeaturedArtists(str) {
201 | if (str.includes('(') || str.includes('[')) {
202 | let matches = [];
203 | let roundBracketMatches = str.match(/\(([^)]+)\)/g);
204 | if (roundBracketMatches) {
205 | matches.push(...roundBracketMatches);
206 | }
207 | let squareBracketMatches = str.match(/\[(.*?)\]/g);
208 | if (squareBracketMatches) {
209 | matches.push(...squareBracketMatches);
210 | }
211 | for (let match of matches) {
212 | for (let featKeyword of featKeywords) {
213 | if (match.includes(featKeyword)) {
214 | return match.replace(featKeyword, '').replace(/\[|\]|\(|\)/g,'');
215 | }
216 | }
217 | }
218 | } else {
219 | for (let featKeyword of featKeywords) {
220 | if (str.includes(featKeyword)) {
221 | return str.substring(str.indexOf(featKeyword) + featKeyword.length, str.length);
222 | }
223 | }
224 | }
225 | }
226 |
227 | function stripFeatureTag(str) {
228 | for (let featKeyword of [' feat ', ' ft ', ' with ', ' featuring ']) {
229 | if (str.includes(featKeyword)) {
230 | str = str.substring(0, str.indexOf(featKeyword));
231 | break;
232 | }
233 | }
234 | return str;
235 | }
236 |
237 | function stripRemasteredTag(str) {
238 | for (let remasteredKeyword of ['remastered', 'remaster']) {
239 | if (str.includes(remasteredKeyword)) {
240 | str = str.substring(0, str.indexOf(remasteredKeyword));
241 | str = stripYears(str);
242 | break;
243 | }
244 | }
245 | return str;
246 | }
247 |
248 | function stripYears(str) {
249 | return str.replace(/\s*\b\d{4}\b/g, '');
250 | }
251 |
252 | function analyzeFeatureTagExcess(str1, str2) {
253 | let cutoff1;
254 | let cutoff2;
255 | for (let featKeyword of featKeywords) {
256 | if (str1.includes(featKeyword)) {
257 | cutoff1 = str1.substring(str1.indexOf(featKeyword) + featKeyword.length, str1.length);
258 | }
259 | if (str2.includes(featKeyword)) {
260 | cutoff2 = str2.substring(str2.indexOf(featKeyword) + featKeyword.length, str2.length);
261 | }
262 | }
263 |
264 | if (cutoff1 && cutoff2) {
265 | let words = getWords(cutoff1, cutoff2);
266 | if (!analyzeWords(words.split1, words.split2)) {
267 | return false;
268 | }
269 | }
270 | return true;
271 | }
272 |
273 | function isRomanNum(num) {
274 | if (num === null || !(typeof num[Symbol.iterator] === 'function')) {
275 | return false;
276 | }
277 | for (let char of num) {
278 | if (!(char in romanNumVals)) {
279 | return false;
280 | }
281 | }
282 | return true;
283 | }
284 |
285 | function convertRomanNumToInt(romanNum) {
286 | let reducer = (acc, cur, idx, src) => acc + (romanNumVals[cur] < romanNumVals[src[idx + 1]] ? -romanNumVals[cur] : romanNumVals[cur]);
287 | return romanNum.split('').reduce(reducer, 0);
288 | }
289 |
290 | function stripAlbumTag(str) {
291 | for (let albumTag of albumKeywords) {
292 | if (str.includes(albumTag)) {
293 | str = str.substring(0, str.indexOf(albumTag));
294 | break;
295 | }
296 | }
297 | str = str.replace('vol.', '');
298 | return str;
299 | }
300 |
301 | function analyzeArtistList(artist1, artist2) {
302 | let isArtist1List = artist1.includes('&') || artist1.includes(',');
303 | let isArtist2List = artist2.includes('&') || artist2.includes(',');
304 | if (isArtist1List) {
305 | let artists1 = artist1.split(/,|&/g).map(str => stripExcessWhitespace(str));
306 | if (isArtist2List) {
307 | let artists2 = stripExcessWhitespace(artist2).split(/,|&/g).map(str => stripExcessWhitespace(str));
308 | if (artists1.filter(artist => artists2.includes(artist)).length > 0) {
309 | return true;
310 | }
311 | } else {
312 | if (artists1.includes(artist2)) {
313 | return true;
314 | }
315 | }
316 | } else if (isArtist2List) {
317 | let artists2 = stripExcessWhitespace(artist2).split(/,|&/g).map(str => stripExcessWhitespace(str));
318 | if (artists2.includes(artist1)) {
319 | return true;
320 | }
321 | }
322 | return false;
323 | }
324 |
--------------------------------------------------------------------------------
/tests/trackRules.test.js:
--------------------------------------------------------------------------------
1 | const rules = require('../client/src/rules');
2 |
3 | describe('Check if the isDuplicateTrack method successfully detects duplicates', () => {
4 | test('Check if a track with an excess feature tag is a duplicate', () => {
5 | expect(rules.isDuplicateTrack('See You Again', 'See You Again (feat. Kali Uchis)', true)).toBe(true);
6 | });
7 | test('Check if a track with an extraneous tag and an excess feature tag is a duplicate', () => {
8 | expect(rules.isDuplicateTrack('Int\'l Players Anthem (I Choose You)', 'Int\'l Players Anthem (I Choose You) (feat. Outkast)', true)).toBe(true);
9 | });
10 | test('Check if a track with excess spaces is a duplicate ', () => {
11 | expect(rules.isDuplicateTrack('Vibin\' Out with ((( O )))' , 'Vibin\' out with (((O)))', true)).toBe(true);
12 | });
13 | test('Check if a track with an altered list of featured artists is a duplicate', () => {
14 | expect(rules.isDuplicateTrack('Izayah (feat. Key!, Maxo Kream & Denzel Curry)', 'Izayah (feat. Key!, Maxo Kream, Denzel Curry & Kenny Beats)', true)).toBe(true);
15 | });
16 | test('Check if a track with altered parentheses is a duplicate', () => {
17 | expect(rules.isDuplicateTrack('Never Bend (Remix) (feat. Lil Uzi Vert)', 'Never Bend (Remix) [feat. Lil Uzi Vert]', true)).toBe(true);
18 | });
19 | test('Check if a track with excess non alphanumeric characters is a duplicate', () => {
20 | expect(rules.isDuplicateTrack('V. 3005 (beach picnic version)', 'V. 3005 - Beach Picnic Version', true)).toBe(true);
21 | });
22 | test('Check if a track with excess abbreviation is a duplicate', () => {
23 | expect(rules.isDuplicateTrack('Drunk In L.A.', 'Drunk in LA', true)).toBe(true);
24 | });
25 | test('Check if interlude with different nonalphanumeric characters is a duplicate', () => {
26 | expect(rules.isDuplicateTrack('Brand New Tyga (Interlude)', 'Brand New Tyga - Interlude', true)).toBe(true);
27 | });
28 | test('Check if same track with different apostrophe is a duplicate', () => {
29 | expect(rules.isDuplicateTrack('You\'re Either On Something', 'You\’re Either On Something', true)).toBe(true);
30 | });
31 | test('Check if same track with different quotation marks is a duplicate', () => {
32 | expect(rules.isDuplicateTrack('1985 (Intro to \"The Fall Off\")', '1985 - Intro to “The Fall Off”', true)).toBe(true);
33 | });
34 | test('Check if track using feat instead of with is a duplicate', () => {
35 | expect(rules.isDuplicateTrack('Only 1 (Interlude) (with Travis Scott)', 'Only 1 (Interlude) [feat. Travis Scott]', true)).toBe(true);
36 | });
37 | test('Check if track using g-dropping is a duplicate', () => {
38 | expect(rules.isDuplicateTrack('Livin\' Underwater (Is Somethin\' Wild)', 'Livin’ Underwater (Is Something Wild)', true)).toBe(true);
39 | });
40 | test('FINISH EM ZEL | F1N1ZH EM ZEL', () => {
41 | expect(rules.isDuplicateTrack('SIRENS l Z1RENZ (feat. J.I.D)', 'SIRENS | Z1RENZ [FEAT. J.I.D | J.1.D]', true)).toBe(true);
42 | });
43 | test('Check if track with extra text is a duplicate', () => {
44 | expect(rules.isDuplicateTrack('Bedtime Stories (Feat. The Weeknd)', 'Bedtime Stories (feat. The Weeknd) - From SR3MM', true)).toBe(true);
45 | });
46 | test('Check if feature tags using different nonalphanumeric characters are duplicates', () => {
47 | expect(rules.isDuplicateTrack('Flying Overseas (feat. Devonte Hynes And Solange Knowles)', 'Flying Overseas - feat. Devonte Hynes And Solange Knowles', true)).toBe(true);
48 | });
49 | test('Check if remix with excess feature tag is a duplicate', () => {
50 | expect(rules.isDuplicateTrack('Drunk In Love Remix', 'Drunk In Love Remix (feat. Jay Z & Kanye West)', true)).toBe(true);
51 | });
52 | test('Check if extended version with excess feature tag is a duplicate', () => {
53 | expect(rules.isDuplicateTrack('Blessings (Extended Version) [feat. Drake & Kanye West]', 'Blessings - Extended Version', true)).toBe(true);
54 | });
55 | test('Check if repeated feature tag is a duplicate', () => {
56 | expect(rules.isDuplicateTrack('Palmolive (feat. Pusha T & Killer Mike)', 'Palmolive feat. Pusha T. & Killer Mike (feat. Pusha T & Killer Mike)', true)).toBe(true);
57 | });
58 | test('Check if tracks using different spelling of \'part\' are duplicates', () => {
59 | expect(rules.isDuplicateTrack('Girls, Girls, Girls (Part 2)', 'Girls, Girls, Girls, Pt. 2', true)).toBe(true);
60 | });
61 | test('Check if track using roman numerals is a duplicate', () => {
62 | expect(rules.isDuplicateTrack('Girls, Girls, Girls, Pt. 2', 'Girls, Girls, Girls, pt. II', true)).toBe(true);
63 | });
64 | test('Check if the same mix is a duplicate', () => {
65 | expect(rules.isDuplicateTrack('Hey Ya! (Radio Mix/Club Mix)', 'Hey Ya! - Radio Mix / Club Mix', true)).toBe(true);
66 | });
67 | test('Check if an ampersand instead of \'and\' is a duplicate', () => {
68 | expect(rules.isDuplicateTrack('Or Nah (feat. The Weeknd, Wiz Khalifa & DJ Mustard) - Remix', 'Or Nah (feat. The Weeknd, Wiz Khalifa and DJ Mustard) - Remix', true)).toBe(true);
69 | });
70 | test('Check if featured artists in different order is a duplicate', () => {
71 | expect(rules.isDuplicateTrack('100 Bands (feat. Quavo, 21 Savage, Meek Mill & YG)', '100 Bands (feat. Quavo, 21 Savage, YG & Meek Mill)', true)).toBe(true);
72 | });
73 | test('Check if abbreviation not using punctuation is a duplicate', () => {
74 | expect(rules.isDuplicateTrack('Operation Lifesaver a.k.a Mint Test', 'Operation Lifesaver aka Mint Test', true)).toBe(true);
75 | });
76 | test('Check if special character using different spacing is a duplicate', () => {
77 | expect(rules.isDuplicateTrack('Music: Response', 'Music:Response', true)).toBe(true);
78 | });
79 | test('Check if feature using \'featuring\' instead of \'feat\' is a duplicate', () => {
80 | expect(rules.isDuplicateTrack('Jailbreak the Tesla (feat. Aminé)', 'Jailbreak the Tesla featuring Aminé', true)).toBe(true);
81 | });
82 | test('Check if track with excess feature and a title with different casing is a duplicate', () => {
83 | expect(rules.isDuplicateTrack('PrimeTime', 'Primetime (feat. Miguel)', true)).toBe(true);
84 | });
85 | test('Check if a remastered song is a duplicate', () => {
86 | expect(rules.isDuplicateTrack('Smooth Criminal', 'Smooth Criminal - 2012 Remaster', true)).toBe(true);
87 | });
88 | });
89 |
90 | describe('Check if the isDuplicateTrack method successfully detects non duplicates', () => {
91 | test('Check if tracks with numeric/non-numeric numbering with mismatching numbers are not duplicates', () => {
92 | expect(rules.isDuplicateTrack('Minus 3', 'Minus One', true)).toBe(false);
93 | });
94 | test('Check if tracks with numeric numbering with mismatching numbers are not duplicates', () => {
95 | expect(rules.isDuplicateTrack('The Birds Part 1', 'The Birds Part 2', true)).toBe(false);
96 | });
97 | test('Check if tracks with a single different word are not duplicates', () => {
98 | expect(rules.isDuplicateTrack('Starfruit LA', 'Starfruit NYC', true)).toBe(false);
99 | });
100 | test('Check if tracks with differing roman numerals are not duplicates', () => {
101 | expect(rules.isDuplicateTrack('Things That Are Bad for Me (Part I)', 'Things That Are Bad for Me (Part II)', true)).toBe(false);
102 | });
103 | test('Check if tracks with the same remastered tag are not duplicates', () => {
104 | expect(rules.isDuplicateTrack('Hotel California - Eagles 2013 Remaster', 'Peaceful Easy Feeling - Eagles 2013 Remaster', true)).toBe(false);
105 | });
106 | test('Check if an original mix and an edit are not duplicates', () => {
107 | expect(rules.isDuplicateTrack('Starry Night - Edit', 'Starry Night - Original Mix', true)).toBe(false);
108 | });
109 | test('Check if tracks with a single different word in parantheses is not a duplicate', () => {
110 | expect(rules.isDuplicateTrack('Bermondsey Bosom (Left)', 'Bermondsey Bosom (Right)', true)).toBe(false);
111 | });
112 | test('Check if the single version of a track is not a duplicate', () => {
113 | expect(rules.isDuplicateTrack('Somebody\'s Watching Me', 'Somebody\'s Watching Me - Single Version', true)).toBe(false);
114 | });
115 | test('Check if the same track remixed by different artists is not a duplicate', () => {
116 | expect(rules.isDuplicateTrack('Positive Contact - Bonus Track - Charlie Clouser Remix', 'Positive Contact - Bonus Track - Mario C Remix', true)).toBe(false);
117 | });
118 | test('Check if the same track remixed by different artists using \'with\' is not a duplicate', () => {
119 | expect(rules.isDuplicateTrack('OMG (with Carly Rae Jepsen) - Alphalove Remix', 'OMG (with Carly Rae Jepsen) - Anki Remix', true)).toBe(false);
120 | });
121 | test('Check if remix nested in feature tag is not a duplicate', () => {
122 | expect(rules.isDuplicateTrack('Genius (with Lil Wayne, Sia, Diplo & Labrinth - Lil Wayne Remix)', 'Genius (with Sia, Diplo & Labrinth)', true)).toBe(false);
123 | });
124 | test('Check if remix following feature tag is not a duplicate', () => {
125 | expect(rules.isDuplicateTrack('In Your Eyes (Feat. Charlotte Day Wilson)', 'In Your Eyes (feat. Charlotte Day Wilson) - Nosaj Thing Remix)', true)).toBe(false);
126 | });
127 | test('Check if a cappella version is not a duplicate', () => {
128 | expect(rules.isDuplicateTrack('Call Out My Name', 'Call Out My Name - A Cappella', true)).toBe(false);
129 | });
130 | test('Check if interlude with the same name is not a duplicate', () => {
131 | expect(rules.isDuplicateTrack('All of the Lights', 'All of the Lights (Interlude)', true)).toBe(false);
132 | });
133 | test('Check if instrumental version is not a duplicate', () => {
134 | expect(rules.isDuplicateTrack('In The City', 'In The City (Instrumental)', true)).toBe(false);
135 | });
136 | test('Check if reprise is not a duplicate', () => {
137 | expect(rules.isDuplicateTrack('Liability', 'Liability (Reprise)', true)).toBe(false);
138 | });
139 | test('Check if tracks with the same name but different featured artists are not duplicates', () => {
140 | expect(rules.isDuplicateTrack('waves (feat. Kacey Musgraves) - Remix', 'waves (feat. Travis Scott) - Remix', true)).toBe(false);
141 | });
142 | test('Check if interludes with similar names are not duplicates', () => {
143 | expect(rules.isDuplicateTrack('For Free? - Interlude', 'For Sale? (interlude)', true)).toBe(false);
144 | });
145 | test('Check if tracks with a name consisting of a single similar word are not duplicates', () => {
146 | expect(rules.isDuplicateTrack('Insecure', 'Insecurity', true)).toBe(false);
147 | });
148 | test('Check if different tracks with the same feature are not duplicates', () => {
149 | expect(rules.isDuplicateTrack('Atlantique Sud (feat. Mai Lan)', 'Bibi the Dog (feat. Mai Lan)', true)).toBe(false);
150 | });
151 | test('Check if a different mix of the same track is not a duplicate', () => {
152 | expect(rules.isDuplicateTrack('So Heavy I Fell Through the Earth - Algorithm Mix', 'So Heavy I Fell Through the Earth - Art Mix', true)).toBe(false);
153 | });
154 | test('Check if a continued song is not a duplicate', () => {
155 | expect(rules.isDuplicateTrack('Everything Now', 'Everything Now (continued)', true)).toBe(false);
156 | });
157 | test('Check if another part of a song using roman numerals is not a duplicate', () => {
158 | expect(rules.isDuplicateTrack('The Face Part I', 'The Face Part II', true)).toBe(false);
159 | });
160 | test('Check if an acoustic song is not a duplicate', () => {
161 | expect(rules.isDuplicateTrack('The Shade', 'The Shade - Acoustic', true)).toBe(false);
162 | });
163 | test('Check if the album version of a song is not a duplicate', () => {
164 | expect(rules.isDuplicateTrack('Tailwhip', 'Tailwhip (Album V)', true)).toBe(false);
165 | });
166 | test('Check if the similar interludes are not a duplicates', () => {
167 | expect(rules.isDuplicateTrack('Datwhip (interlude)', 'Dntstop (interlude)', true)).toBe(false);
168 | });
169 | test('Check if different remix artists are not stripped and marked as a duplicate', () => {
170 | expect(rules.isDuplicateTrack('Sylvia Says (Breakbot Remix)', 'Sylvia Says (Tensnake Remix)', true)).toBe(false);
171 | });
172 | });
173 |
--------------------------------------------------------------------------------
/client/src/MainPage.js:
--------------------------------------------------------------------------------
1 | import React, { Component } from 'react';
2 | import MenuItem from '@material-ui/core/MenuItem';
3 | import Select from '@material-ui/core/Select';
4 | import TextField from '@material-ui/core/TextField';
5 | import Button from '@material-ui/core/Button';
6 | import Typography from '@material-ui/core/Typography';
7 | import './MainPage.css';
8 | import { withStyles } from "@material-ui/core/styles";
9 | import DuplicateTable from './DuplicateTable';
10 | import {createMuiTheme, MuiThemeProvider} from '@material-ui/core/styles';
11 | import LinearProgress from '@material-ui/core/LinearProgress';
12 | import { green, blue } from '@material-ui/core/colors';
13 | import Switch from '@material-ui/core/Switch';
14 | import DuplicateArtistTable from './DuplicateArtistTable';
15 | import { isDuplicateTrack, isDuplicateAlbum, isDuplicateArtist } from './rules';
16 | import DialogTitle from '@material-ui/core/DialogTitle';
17 | import Dialog from '@material-ui/core/Dialog';
18 | import DialogContent from '@material-ui/core/DialogContent';
19 | import DialogContentText from '@material-ui/core/DialogContentText';
20 | import { Paper, ButtonGroup, List, ListItem, Avatar, ListItemAvatar, ListItemText } from '@material-ui/core';
21 | import { LinkIcon, GitHubIcon, LastFmIcon, HelpIcon } from './Icons';
22 | import { registerPageView, registerDownload, registerRequest, registerLastfmError, registerCount } from './analytics';
23 |
24 | const styles = theme => ({
25 | root: {
26 | textAlign: 'center',
27 | display: 'flex',
28 | flexDirection: 'column',
29 | alignItems: 'center',
30 | },
31 | mainPageElem: {
32 | margin: '10px',
33 | }
34 | });
35 |
36 | const theme = createMuiTheme({
37 | palette: {
38 | type: 'dark',
39 | primary: green,
40 | secondary: blue
41 | },
42 | props: {
43 | MuiInput: { inputProps: { spellCheck: 'false' } }
44 | }
45 | });
46 |
47 | const sortResults = (a, b) => {
48 | a = a.replace(/:|\//g,' ').replace(/[^A-Za-z0-9\s]/g, '').toLowerCase();
49 | b = b.replace(/:|\//g,' ').replace(/[^A-Za-z0-9\s]/g, '').toLowerCase();
50 | if (a > b) {
51 | return 1;
52 | } else if (a < b) {
53 | return -1;
54 | }
55 | return 0;
56 | }
57 |
58 | const getNumResults = (matched) => Object.keys(matched).reduce((acc, val) => acc + matched[val].length, 0);
59 |
60 | class MainPage extends Component {
61 | constructor(props) {
62 | super(props);
63 | this.state = {reqType: 'tracks', user: '', useRules: true, isLoading: false, loadPercent: 0, results: {}, error: '', isHelpOpen: false, isAboutOpen: false};
64 | this.handleInputChange = this.handleInputChange.bind(this);
65 | this.makeRequest = this.makeRequest.bind(this);
66 | this.getPage = this.getPage.bind(this);
67 | this.partitionResults = this.partitionResults.bind(this);
68 | this.HelpDialog = this.HelpDialog.bind(this);
69 | this.AboutDialog = this.AboutDialog.bind(this);
70 | this.downloadResults = this.downloadResults.bind(this);
71 | }
72 |
73 | componentDidMount() {
74 | registerPageView();
75 | }
76 |
77 | makeRequest(event) {
78 | registerRequest(this.state.reqType, this.state.useRules);
79 | if (this.state.reqType && !this.state.isLoading) {
80 | this.setState({isLoading: true, loadPercent: 0}, () => {
81 | fetch(`/num${this.state.reqType}?user=${this.state.user}`).then(response => {
82 | if (response.status === 200) {
83 | response.text().then(t => {
84 | let percentStep = Math.floor(100 / (Math.ceil(parseInt(t) / 1000)));
85 | this.getPage(parseInt(t), 1, [], percentStep, 0);
86 | })
87 | } else {
88 | response.json().then(res => {
89 | this.setState({ results: '', isLoading: false, error: res.error });
90 | });
91 | }
92 | });
93 | });
94 | }
95 | event.preventDefault();
96 | }
97 |
98 | getPage(total, pageNum, results, percentStep, numTries) {
99 | fetch(`/${this.state.reqType}?user=${this.state.user}&pageNum=${pageNum}`).then(response => {
100 | if (response.status === 200) {
101 | response.json().then(res => {
102 | results.push(...res);
103 | if (total - 1000 > 0) {
104 | this.setState({ loadPercent: this.state.loadPercent + percentStep });
105 | this.getPage(total - 1000, pageNum + 1, results, percentStep);
106 | } else {
107 | if (this.state.reqType === 'artists') {
108 | this.setState({ loadPercent: this.state.loadPercent + percentStep });
109 | this.getDuplicateArtists(results, percentStep);
110 | } else {
111 | this.setState({ loadPercent: this.state.loadPercent + percentStep });
112 | this.partitionResults(results, percentStep);
113 | }
114 | }
115 | });
116 | } else {
117 | response.json().then(res => {
118 | if (numTries >= 5) {
119 | registerLastfmError(res.error, this.state.reqType);
120 | this.setState({ results: '', isLoading: false, error: res.error });
121 | } else {
122 | this.getPage(total, pageNum, results, percentStep, numTries + 1);
123 | }
124 | });
125 | }
126 | });
127 | }
128 |
129 | partitionResults(results, percentStep) {
130 | let partitioned = {};
131 | for (let i = 0; i < results.length; i++) {
132 | if (!partitioned[results[i].artist]) {
133 | partitioned[results[i].artist] = [results[i].name]
134 | } else {
135 | partitioned[results[i].artist].push(results[i].name);
136 | }
137 | }
138 | if (this.state.reqType === 'tracks') {
139 | this.setState({ loadPercent: this.state.loadPercent + percentStep });
140 | this.getDuplicateTracks(partitioned);
141 | } else {
142 | this.setState({ loadPercent: this.state.loadPercent + percentStep });
143 | this.getDuplicateAlbums(partitioned);
144 | }
145 | }
146 |
147 | getDuplicateTracks(partitioned) {
148 | let matched = {};
149 | for (let artist of Object.keys(partitioned)) {
150 | partitioned[artist].sort((a, b) => sortResults(a, b));
151 | for (let i = 0; i < partitioned[artist].length - 1; i++) {
152 | if (isDuplicateTrack(partitioned[artist][i], partitioned[artist][i + 1], this.state.useRules)) {
153 | // TODO: If a match is found, compare against the next track to find multiple duplicates
154 | if (!matched[artist]) {
155 | matched[artist] = [{result1: partitioned[artist][i], result2: partitioned[artist][i + 1]}];
156 | } else {
157 | matched[artist].push({result1: partitioned[artist][i], result2: partitioned[artist][i + 1]});
158 | }
159 | }
160 | }
161 | }
162 | registerCount(getNumResults(matched), this.state.reqType);
163 | this.setState({ results: this.state.reqType === 'artists' ? {matches: matched} : matched, isLoading: false, error: '', loadPercent: 100 });
164 | }
165 |
166 | getDuplicateAlbums(partitioned) {
167 | let matched = {};
168 | for (let artist of Object.keys(partitioned)) {
169 | partitioned[artist].sort((a, b) => sortResults(a, b));
170 | for (let i = 0; i < partitioned[artist].length - 1; i++) {
171 | if (isDuplicateAlbum(partitioned[artist][i], partitioned[artist][i + 1], this.state.useRules)) {
172 | // TODO: If a match is found, compare against the next track to find multiple duplicates
173 | if (!matched[artist]) {
174 | matched[artist] = [{result1: partitioned[artist][i], result2: partitioned[artist][i + 1]}];
175 | } else {
176 | matched[artist].push({result1: partitioned[artist][i], result2: partitioned[artist][i + 1]});
177 | }
178 | }
179 | }
180 | }
181 | registerCount(getNumResults(matched), this.state.reqType);
182 | this.setState({ results: this.state.reqType === 'artists' ? {matches: matched} : matched, isLoading: false, error: '', loadPercent: 100 });
183 | }
184 |
185 | getDuplicateArtists(results) {
186 | let matched = [];
187 | results.sort((a, b) => sortResults(a, b));
188 | for (let i = 0; i < results.length - 1; i++) {
189 | if (isDuplicateArtist(results[i], results[i + 1], this.state.useRules)) {
190 | matched.push({result1: results[i], result2: results[i + 1]});
191 | }
192 | }
193 | registerCount(matched.length, this.state.reqType);
194 | this.setState({ results: this.state.reqType === 'artists' ? {matches: matched} : matched, isLoading: false, error: '', loadPercent: 100 });
195 | }
196 |
197 | handleInputChange(event) {
198 | if (event.target.name === 'reqType') {
199 | this.setState({ results: {}, loadPercent: 0 });
200 | }
201 | this.setState({ [event.target.name] : event.target.name === 'useRules' ? event.target.checked : event.target.value});
202 | }
203 |
204 | HelpDialog() {
205 | return (
206 |
214 | );
215 | }
216 |
217 | AboutDialog() {
218 | return (
219 |
268 | );
269 | }
270 |
271 | downloadResults(format) {
272 | registerDownload(format);
273 | const element = document.createElement("a");
274 | let file;
275 | if (format === 'json') {
276 | file = new Blob([JSON.stringify(this.state.results)], {type: 'application/json'});
277 | } else {
278 | let csvContent;
279 | if (this.state.reqType === 'tracks' || this.state.reqType === 'albums') {
280 | let header = this.state.reqType === 'tracks' ? 'track' : 'album'
281 | csvContent = Object.keys(this.state.results).reduce((acc1, artist) => (
282 | acc1 + this.state.results[artist].reduce((acc2, result) => acc2 + `"${artist}","${result.result1}","${result.result2}"\n`, '')
283 | ), `artist,${header}1,${header}2\n`);
284 | } else {
285 | csvContent = this.state.results.matches.reduce((acc, result) => acc + `"${result.result1}","${result.result2}"\n`, 'artist1,artist2\n');
286 | }
287 | file = new Blob([csvContent], {type: 'text/csv;charset=utf-8;'});
288 | }
289 | element.href = URL.createObjectURL(file);
290 | element.download = `${this.state.user}-${this.state.reqType}.${format}`;
291 | document.body.appendChild(element);
292 | element.click();
293 | }
294 |
295 | render() {
296 | const { classes } = this.props;
297 | let resultsView;
298 | if (this.state.isLoading) {
299 | resultsView =
300 | } else {
301 | if (this.state.results) {
302 | if (this.state.reqType === 'albums' || this.state.reqType === 'tracks') {
303 | resultsView = ;
304 | } else if (this.state.results.matches) {
305 | resultsView =
306 | }
307 | } else {
308 | resultsView = {this.state.error}
309 | }
310 | }
311 | return (
312 |
313 |