85 |
86 | Read the Docs
87 | v: ${config.versions.current.slug}
88 |
89 |
90 |
91 |
92 | ${renderLanguages(config)}
93 | ${renderVersions(config)}
94 | ${renderDownloads(config)}
95 |
96 | - On Read the Docs
97 | -
98 | Project Home
99 |
100 | -
101 | Builds
102 |
103 | -
104 | Downloads
105 |
106 |
107 |
108 | - Search
109 | -
110 |
119 |
120 |
121 |
122 |
123 | Hosted by Read the Docs
124 |
125 |
126 |
127 | `;
128 |
129 | // Inject the generated flyout into the body HTML element.
130 | document.body.insertAdjacentHTML("beforeend", flyout);
131 |
132 | // Trigger the Read the Docs Addons Search modal when clicking on the "Search docs" input from inside the flyout.
133 | document
134 | .querySelector("#flyout-search-form")
135 | .addEventListener("focusin", () => {
136 | const event = new CustomEvent("readthedocs-search-show");
137 | document.dispatchEvent(event);
138 | });
139 | })
140 | }
141 |
142 | if (themeLanguageSelector || themeVersionSelector) {
143 | function onSelectorSwitch(event) {
144 | const option = event.target.selectedIndex;
145 | const item = event.target.options[option];
146 | window.location.href = item.dataset.url;
147 | }
148 |
149 | document.addEventListener("readthedocs-addons-data-ready", function (event) {
150 | const config = event.detail.data();
151 |
152 | const versionSwitch = document.querySelector(
153 | "div.switch-menus > div.version-switch",
154 | );
155 | if (themeVersionSelector) {
156 | let versions = config.versions.active;
157 | if (config.versions.current.hidden || config.versions.current.type === "external") {
158 | versions.unshift(config.versions.current);
159 | }
160 | const versionSelect = `
161 |
174 | `;
175 |
176 | versionSwitch.innerHTML = versionSelect;
177 | versionSwitch.firstElementChild.addEventListener("change", onSelectorSwitch);
178 | }
179 |
180 | const languageSwitch = document.querySelector(
181 | "div.switch-menus > div.language-switch",
182 | );
183 |
184 | if (themeLanguageSelector) {
185 | if (config.projects.translations.length) {
186 | // Add the current language to the options on the selector
187 | let languages = config.projects.translations.concat(
188 | config.projects.current,
189 | );
190 | languages = languages.sort((a, b) =>
191 | a.language.name.localeCompare(b.language.name),
192 | );
193 |
194 | const languageSelect = `
195 |
208 | `;
209 |
210 | languageSwitch.innerHTML = languageSelect;
211 | languageSwitch.firstElementChild.addEventListener("change", onSelectorSwitch);
212 | }
213 | else {
214 | languageSwitch.remove();
215 | }
216 | }
217 | });
218 | }
219 |
220 | document.addEventListener("readthedocs-addons-data-ready", function (event) {
221 | // Trigger the Read the Docs Addons Search modal when clicking on "Search docs" input from the topnav.
222 | document
223 | .querySelector("[role='search'] input")
224 | .addEventListener("focusin", () => {
225 | const event = new CustomEvent("readthedocs-search-show");
226 | document.dispatchEvent(event);
227 | });
228 | });
--------------------------------------------------------------------------------
/docs/pages/html/_static/language_data.js:
--------------------------------------------------------------------------------
1 | /*
2 | * This script contains the language-specific data used by searchtools.js,
3 | * namely the list of stopwords, stemmer, scorer and splitter.
4 | */
5 |
6 | var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
7 |
8 |
9 | /* Non-minified version is copied as a separate JS file, if available */
10 |
11 | /**
12 | * Porter Stemmer
13 | */
14 | var Stemmer = function() {
15 |
16 | var step2list = {
17 | ational: 'ate',
18 | tional: 'tion',
19 | enci: 'ence',
20 | anci: 'ance',
21 | izer: 'ize',
22 | bli: 'ble',
23 | alli: 'al',
24 | entli: 'ent',
25 | eli: 'e',
26 | ousli: 'ous',
27 | ization: 'ize',
28 | ation: 'ate',
29 | ator: 'ate',
30 | alism: 'al',
31 | iveness: 'ive',
32 | fulness: 'ful',
33 | ousness: 'ous',
34 | aliti: 'al',
35 | iviti: 'ive',
36 | biliti: 'ble',
37 | logi: 'log'
38 | };
39 |
40 | var step3list = {
41 | icate: 'ic',
42 | ative: '',
43 | alize: 'al',
44 | iciti: 'ic',
45 | ical: 'ic',
46 | ful: '',
47 | ness: ''
48 | };
49 |
50 | var c = "[^aeiou]"; // consonant
51 | var v = "[aeiouy]"; // vowel
52 | var C = c + "[^aeiouy]*"; // consonant sequence
53 | var V = v + "[aeiou]*"; // vowel sequence
54 |
55 | var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
56 | var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
57 | var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
58 | var s_v = "^(" + C + ")?" + v; // vowel in stem
59 |
60 | this.stemWord = function (w) {
61 | var stem;
62 | var suffix;
63 | var firstch;
64 | var origword = w;
65 |
66 | if (w.length < 3)
67 | return w;
68 |
69 | var re;
70 | var re2;
71 | var re3;
72 | var re4;
73 |
74 | firstch = w.substr(0,1);
75 | if (firstch == "y")
76 | w = firstch.toUpperCase() + w.substr(1);
77 |
78 | // Step 1a
79 | re = /^(.+?)(ss|i)es$/;
80 | re2 = /^(.+?)([^s])s$/;
81 |
82 | if (re.test(w))
83 | w = w.replace(re,"$1$2");
84 | else if (re2.test(w))
85 | w = w.replace(re2,"$1$2");
86 |
87 | // Step 1b
88 | re = /^(.+?)eed$/;
89 | re2 = /^(.+?)(ed|ing)$/;
90 | if (re.test(w)) {
91 | var fp = re.exec(w);
92 | re = new RegExp(mgr0);
93 | if (re.test(fp[1])) {
94 | re = /.$/;
95 | w = w.replace(re,"");
96 | }
97 | }
98 | else if (re2.test(w)) {
99 | var fp = re2.exec(w);
100 | stem = fp[1];
101 | re2 = new RegExp(s_v);
102 | if (re2.test(stem)) {
103 | w = stem;
104 | re2 = /(at|bl|iz)$/;
105 | re3 = new RegExp("([^aeiouylsz])\\1$");
106 | re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
107 | if (re2.test(w))
108 | w = w + "e";
109 | else if (re3.test(w)) {
110 | re = /.$/;
111 | w = w.replace(re,"");
112 | }
113 | else if (re4.test(w))
114 | w = w + "e";
115 | }
116 | }
117 |
118 | // Step 1c
119 | re = /^(.+?)y$/;
120 | if (re.test(w)) {
121 | var fp = re.exec(w);
122 | stem = fp[1];
123 | re = new RegExp(s_v);
124 | if (re.test(stem))
125 | w = stem + "i";
126 | }
127 |
128 | // Step 2
129 | re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
130 | if (re.test(w)) {
131 | var fp = re.exec(w);
132 | stem = fp[1];
133 | suffix = fp[2];
134 | re = new RegExp(mgr0);
135 | if (re.test(stem))
136 | w = stem + step2list[suffix];
137 | }
138 |
139 | // Step 3
140 | re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
141 | if (re.test(w)) {
142 | var fp = re.exec(w);
143 | stem = fp[1];
144 | suffix = fp[2];
145 | re = new RegExp(mgr0);
146 | if (re.test(stem))
147 | w = stem + step3list[suffix];
148 | }
149 |
150 | // Step 4
151 | re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
152 | re2 = /^(.+?)(s|t)(ion)$/;
153 | if (re.test(w)) {
154 | var fp = re.exec(w);
155 | stem = fp[1];
156 | re = new RegExp(mgr1);
157 | if (re.test(stem))
158 | w = stem;
159 | }
160 | else if (re2.test(w)) {
161 | var fp = re2.exec(w);
162 | stem = fp[1] + fp[2];
163 | re2 = new RegExp(mgr1);
164 | if (re2.test(stem))
165 | w = stem;
166 | }
167 |
168 | // Step 5
169 | re = /^(.+?)e$/;
170 | if (re.test(w)) {
171 | var fp = re.exec(w);
172 | stem = fp[1];
173 | re = new RegExp(mgr1);
174 | re2 = new RegExp(meq1);
175 | re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
176 | if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
177 | w = stem;
178 | }
179 | re = /ll$/;
180 | re2 = new RegExp(mgr1);
181 | if (re.test(w) && re2.test(w)) {
182 | re = /.$/;
183 | w = w.replace(re,"");
184 | }
185 |
186 | // and turn initial Y back to y
187 | if (firstch == "y")
188 | w = firstch.toLowerCase() + w.substr(1);
189 | return w;
190 | }
191 | }
192 |
193 |
--------------------------------------------------------------------------------
/docs/pages/html/_static/minus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erdogant/undouble/735f548fbd7e9cd5ac4c9b891e5f685753727e6b/docs/pages/html/_static/minus.png
--------------------------------------------------------------------------------
/docs/pages/html/_static/plus.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erdogant/undouble/735f548fbd7e9cd5ac4c9b891e5f685753727e6b/docs/pages/html/_static/plus.png
--------------------------------------------------------------------------------
/docs/pages/html/_static/pygments.css:
--------------------------------------------------------------------------------
1 | pre { line-height: 125%; }
2 | td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
3 | span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
4 | td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
5 | span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
6 | .highlight .hll { background-color: #ffffcc }
7 | .highlight { background: #eeffcc; }
8 | .highlight .c { color: #408090; font-style: italic } /* Comment */
9 | .highlight .err { border: 1px solid #FF0000 } /* Error */
10 | .highlight .k { color: #007020; font-weight: bold } /* Keyword */
11 | .highlight .o { color: #666666 } /* Operator */
12 | .highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
13 | .highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
14 | .highlight .cp { color: #007020 } /* Comment.Preproc */
15 | .highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
16 | .highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
17 | .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
18 | .highlight .gd { color: #A00000 } /* Generic.Deleted */
19 | .highlight .ge { font-style: italic } /* Generic.Emph */
20 | .highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
21 | .highlight .gr { color: #FF0000 } /* Generic.Error */
22 | .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
23 | .highlight .gi { color: #00A000 } /* Generic.Inserted */
24 | .highlight .go { color: #333333 } /* Generic.Output */
25 | .highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
26 | .highlight .gs { font-weight: bold } /* Generic.Strong */
27 | .highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
28 | .highlight .gt { color: #0044DD } /* Generic.Traceback */
29 | .highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
30 | .highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
31 | .highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
32 | .highlight .kp { color: #007020 } /* Keyword.Pseudo */
33 | .highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
34 | .highlight .kt { color: #902000 } /* Keyword.Type */
35 | .highlight .m { color: #208050 } /* Literal.Number */
36 | .highlight .s { color: #4070a0 } /* Literal.String */
37 | .highlight .na { color: #4070a0 } /* Name.Attribute */
38 | .highlight .nb { color: #007020 } /* Name.Builtin */
39 | .highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
40 | .highlight .no { color: #60add5 } /* Name.Constant */
41 | .highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
42 | .highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
43 | .highlight .ne { color: #007020 } /* Name.Exception */
44 | .highlight .nf { color: #06287e } /* Name.Function */
45 | .highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
46 | .highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
47 | .highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
48 | .highlight .nv { color: #bb60d5 } /* Name.Variable */
49 | .highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
50 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */
51 | .highlight .mb { color: #208050 } /* Literal.Number.Bin */
52 | .highlight .mf { color: #208050 } /* Literal.Number.Float */
53 | .highlight .mh { color: #208050 } /* Literal.Number.Hex */
54 | .highlight .mi { color: #208050 } /* Literal.Number.Integer */
55 | .highlight .mo { color: #208050 } /* Literal.Number.Oct */
56 | .highlight .sa { color: #4070a0 } /* Literal.String.Affix */
57 | .highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
58 | .highlight .sc { color: #4070a0 } /* Literal.String.Char */
59 | .highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
60 | .highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
61 | .highlight .s2 { color: #4070a0 } /* Literal.String.Double */
62 | .highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
63 | .highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
64 | .highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
65 | .highlight .sx { color: #c65d09 } /* Literal.String.Other */
66 | .highlight .sr { color: #235388 } /* Literal.String.Regex */
67 | .highlight .s1 { color: #4070a0 } /* Literal.String.Single */
68 | .highlight .ss { color: #517918 } /* Literal.String.Symbol */
69 | .highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
70 | .highlight .fm { color: #06287e } /* Name.Function.Magic */
71 | .highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
72 | .highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
73 | .highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
74 | .highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
75 | .highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
--------------------------------------------------------------------------------
/docs/pages/html/_static/sphinx_highlight.js:
--------------------------------------------------------------------------------
1 | /* Highlighting utilities for Sphinx HTML documentation. */
2 | "use strict";
3 |
4 | const SPHINX_HIGHLIGHT_ENABLED = true
5 |
6 | /**
7 | * highlight a given string on a node by wrapping it in
8 | * span elements with the given class name.
9 | */
10 | const _highlight = (node, addItems, text, className) => {
11 | if (node.nodeType === Node.TEXT_NODE) {
12 | const val = node.nodeValue;
13 | const parent = node.parentNode;
14 | const pos = val.toLowerCase().indexOf(text);
15 | if (
16 | pos >= 0 &&
17 | !parent.classList.contains(className) &&
18 | !parent.classList.contains("nohighlight")
19 | ) {
20 | let span;
21 |
22 | const closestNode = parent.closest("body, svg, foreignObject");
23 | const isInSVG = closestNode && closestNode.matches("svg");
24 | if (isInSVG) {
25 | span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
26 | } else {
27 | span = document.createElement("span");
28 | span.classList.add(className);
29 | }
30 |
31 | span.appendChild(document.createTextNode(val.substr(pos, text.length)));
32 | const rest = document.createTextNode(val.substr(pos + text.length));
33 | parent.insertBefore(
34 | span,
35 | parent.insertBefore(
36 | rest,
37 | node.nextSibling
38 | )
39 | );
40 | node.nodeValue = val.substr(0, pos);
41 | /* There may be more occurrences of search term in this node. So call this
42 | * function recursively on the remaining fragment.
43 | */
44 | _highlight(rest, addItems, text, className);
45 |
46 | if (isInSVG) {
47 | const rect = document.createElementNS(
48 | "http://www.w3.org/2000/svg",
49 | "rect"
50 | );
51 | const bbox = parent.getBBox();
52 | rect.x.baseVal.value = bbox.x;
53 | rect.y.baseVal.value = bbox.y;
54 | rect.width.baseVal.value = bbox.width;
55 | rect.height.baseVal.value = bbox.height;
56 | rect.setAttribute("class", className);
57 | addItems.push({ parent: parent, target: rect });
58 | }
59 | }
60 | } else if (node.matches && !node.matches("button, select, textarea")) {
61 | node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
62 | }
63 | };
64 | const _highlightText = (thisNode, text, className) => {
65 | let addItems = [];
66 | _highlight(thisNode, addItems, text, className);
67 | addItems.forEach((obj) =>
68 | obj.parent.insertAdjacentElement("beforebegin", obj.target)
69 | );
70 | };
71 |
72 | /**
73 | * Small JavaScript module for the documentation.
74 | */
75 | const SphinxHighlight = {
76 |
77 | /**
78 | * highlight the search words provided in localstorage in the text
79 | */
80 | highlightSearchWords: () => {
81 | if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
82 |
83 | // get and clear terms from localstorage
84 | const url = new URL(window.location);
85 | const highlight =
86 | localStorage.getItem("sphinx_highlight_terms")
87 | || url.searchParams.get("highlight")
88 | || "";
89 | localStorage.removeItem("sphinx_highlight_terms")
90 | url.searchParams.delete("highlight");
91 | window.history.replaceState({}, "", url);
92 |
93 | // get individual terms from highlight string
94 | const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
95 | if (terms.length === 0) return; // nothing to do
96 |
97 | // There should never be more than one element matching "div.body"
98 | const divBody = document.querySelectorAll("div.body");
99 | const body = divBody.length ? divBody[0] : document.querySelector("body");
100 | window.setTimeout(() => {
101 | terms.forEach((term) => _highlightText(body, term, "highlighted"));
102 | }, 10);
103 |
104 | const searchBox = document.getElementById("searchbox");
105 | if (searchBox === null) return;
106 | searchBox.appendChild(
107 | document
108 | .createRange()
109 | .createContextualFragment(
110 | '
' +
111 | '' +
112 | _("Hide Search Matches") +
113 | "
"
114 | )
115 | );
116 | },
117 |
118 | /**
119 | * helper function to hide the search marks again
120 | */
121 | hideSearchWords: () => {
122 | document
123 | .querySelectorAll("#searchbox .highlight-link")
124 | .forEach((el) => el.remove());
125 | document
126 | .querySelectorAll("span.highlighted")
127 | .forEach((el) => el.classList.remove("highlighted"));
128 | localStorage.removeItem("sphinx_highlight_terms")
129 | },
130 |
131 | initEscapeListener: () => {
132 | // only install a listener if it is really needed
133 | if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
134 |
135 | document.addEventListener("keydown", (event) => {
136 | // bail for input elements
137 | if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
138 | // bail with special keys
139 | if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
140 | if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
141 | SphinxHighlight.hideSearchWords();
142 | event.preventDefault();
143 | }
144 | });
145 | },
146 | };
147 |
148 | _ready(() => {
149 | /* Do not call highlightSearchWords() when we are on the search page.
150 | * It will highlight words from the *previous* search query.
151 | */
152 | if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords();
153 | SphinxHighlight.initEscapeListener();
154 | });
155 |
--------------------------------------------------------------------------------
/docs/pages/html/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/erdogant/undouble/735f548fbd7e9cd5ac4c9b891e5f685753727e6b/docs/pages/html/objects.inv
--------------------------------------------------------------------------------
/docs/pages/html/py-modindex.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
Python Module Index — undouble undouble documentation
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
112 |
113 |
117 |
118 |
119 |
120 |
121 |
122 |
123 | - Python Module Index
124 | -
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
Python Module Index
134 |
135 |
138 |
139 |
140 | | | |
141 | |
142 | u | |
143 |
144 |  |
146 |
147 | undouble |
148 | |
149 |
150 | |
151 |
152 | undouble.undouble |
153 | |
154 |
155 |
156 |
157 |
158 |
159 |
173 |
174 |
175 |
176 |
177 |
182 |
183 |
184 |
--------------------------------------------------------------------------------
/docs/pages/html/search.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
Search — undouble undouble documentation
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
112 |
113 |
117 |
118 |
119 |
120 |
121 |
122 |
123 | - Search
124 | -
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
161 |
162 |
163 |
164 |
165 |
170 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
--------------------------------------------------------------------------------
/docs/pages/html/sponsor.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
<no title> — undouble undouble documentation
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
110 |
111 |
115 |
116 |
117 |
118 |
128 |
129 |
130 |
131 |
132 |
Tip
133 |
You can support this project in various ways ❤️
134 |
135 | Become a Sponsor!
136 | Buy me a coffee! I ❤️ coffee :)
137 | Donate in Bitcoin.
138 | Follow me on Medium! Go to my medium profile and press follow.
139 | Subscribe to me on Substack! Go to my profile where you can subscribe.
140 | Star this repo at the github page.
141 | You can also buy a digital educational product over here.
142 | Other contributions can be in the form of feature requests, idea discussions, reporting bugs, opening pull requests.
143 |
144 |
Cheers Mate.
145 |
146 |
147 |
148 |
149 |
150 |
164 |
165 |
166 |
167 |
168 |
173 |
174 |
175 |
--------------------------------------------------------------------------------
/docs/source/Abstract.rst:
--------------------------------------------------------------------------------
1 | .. _code_directive:
2 |
3 | -------------------------------------
4 |
5 | Abstract
6 | ''''''''
7 |
8 | Background
9 | Searching for (near-)identical photos at your system can be a tedious effort because you need to click across your entire system,
10 | containing likely thousands of photos, and then decide for each photo whether it is a "similar" one.
11 | The most straightforward approach to detect identical photos would be on file size or filename.
12 | However, photos are usually derived from different sources such as mobile devices, social media apps, which leads to
13 | differences in file size, name, but also in resolution, scaling, compression, and brightness.
14 |
15 | Aim
16 | Hash functions are ideal to detect photos because of the robustness against minor changes.
17 | The aim of ``undouble`` is to detect (near-)identical images. It works using a multi-step proces of carefully pre-processing the images,
18 | computing the image-hash, evaluating the hash-differences across all images, and group images with similar/identical image-hash.
19 |
20 | Results
21 | ``undouble`` is Python package for to detect images with similar/identical image-hash.
22 | There are many applications, such as in digital forensics, copyright enforcement, and more generically for disk space reduction and thus undoubling.
23 |
24 |
25 | Schematic overview
26 | '''''''''''''''''''
27 |
28 | The schematic overview of our approach is as following:
29 |
30 | .. _schematic_overview:
31 |
32 | .. figure:: ../figs/schematic_overview.png
33 |
34 |
35 |
36 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/Coding quality.rst:
--------------------------------------------------------------------------------
1 |
2 | Coding quality
3 | '''''''''''''''''''''
4 |
5 | I value software quality. Higher quality software has fewer defects, better security, and better performance, which leads to happier users who can work more effectively.
6 | Code reviews are an effective method for improving software quality. McConnell (2004) suggests that unit testing finds approximately 25% of defects, function testing 35%, integration testing 45%, and code review 55-60%.
7 | While this means that none of these methods are good enough on their own and that they should be combined, clearly code review is an essential tool here.
8 |
9 | This library is therefore developed with several techniques, such as coding styling, low complexity, docstrings, reviews, and unit tests.
10 | Such conventions are helpfull to improve the quality, make the code cleaner and more understandable but alos to trace future bugs, and spot syntax errors.
11 |
12 |
13 | library
14 | -------
15 |
16 | The file structure of the generated package looks like:
17 |
18 |
19 | .. code-block:: bash
20 |
21 | path/to/undouble/
22 | ├── .editorconfig
23 | ├── .gitignore
24 | ├── .pre-commit-config.yml
25 | ├── .prospector.yml
26 | ├── CHANGELOG.rst
27 | ├── docs
28 | │ ├── conf.py
29 | │ ├── index.rst
30 | │ └── ...
31 | ├── LICENSE
32 | ├── MANIFEST.in
33 | ├── NOTICE
34 | ├── undouble
35 | │ ├── __init__.py
36 | │ ├── __version__.py
37 | │ └── undouble.py
38 | ├── README.md
39 | ├── requirements.txt
40 | ├── setup.cfg
41 | ├── setup.py
42 | └── tests
43 | ├── __init__.py
44 | └── test_undouble.py
45 |
46 |
47 | Style
48 | -----
49 |
50 | This library is compliant with the PEP-8 standards.
51 | PEP stands for Python Enhancement Proposal and sets a baseline for the readability of Python code.
52 | Each public function contains a docstring that is based on numpy standards.
53 |
54 |
55 | Complexity
56 | ----------
57 |
58 | This library has been developed by using measures that help decreasing technical debt.
59 | Version 0.1.0 of the ``undouble`` library scored, according the code analyzer: **VALUE**, for which values > 0 are good and 10 is a maximum score.
60 | Developing software with low(er) technical dept may take extra development time, but has many advantages:
61 |
62 | * Higher quality code
63 | * easier maintanable
64 | * Less prone to bugs and errors
65 | * Higher security
66 |
67 |
68 | Unit tests
69 | ----------
70 |
71 | The use of unit tests is essential to garantee a consistent output of developed functions.
72 | The following tests are secured using :func:`tests.test_undouble`:
73 |
74 | * The input are checked.
75 | * The output values are checked and whether they are encoded properly.
76 | * The check of whether parameters are handled correctly.
77 |
78 |
79 | .. code-block:: bash
80 |
81 | pytest tests\test_undouble.py
82 |
83 | ====================================== test session starts ======================================
84 | platform win32 -- Python 3.6.10, pytest-5.4.0, py-1.8.1, pluggy-0.13.1
85 | collected 3 items
86 |
87 | tests\test_undouble.py ... [100%]
88 |
89 | ======================================= warnings summary ========================================
90 | tests/test_undouble.py::test_plot
91 |
92 | =========================== 3 passed, 1 warning in 1254.97s (0:20:54) ===========================
93 |
94 |
95 |
96 |
97 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/Documentation.rst:
--------------------------------------------------------------------------------
1 | Sponsor
2 | ############
3 |
4 | .. include:: sponsor.rst
5 |
6 |
7 | Medium Blog
8 | ############
9 |
10 | .. note::
11 | `Read more details and the usage in the Medium Blog: Detection of Duplicate Images Using Image Hash Functions
`_
12 |
13 | Github
14 | ############
15 |
16 | .. note::
17 | `Source code of undouble can be found at Github `_
18 |
19 |
20 |
21 | Citing
22 | #########
23 |
24 | .. note::
25 | Bibtex can be found at the right side at the `github page `_.
26 |
27 |
28 |
29 |
30 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/Installation.rst:
--------------------------------------------------------------------------------
1 | .. _code_directive:
2 |
3 | -------------------------------------
4 |
5 | Installation
6 | ''''''''''''
7 |
8 | Create environment
9 | ------------------
10 |
11 |
12 | If desired, install ``undouble`` from an isolated Python environment using conda:
13 |
14 | .. code-block:: python
15 |
16 | conda create -n env_undouble python=3.8
17 | conda activate env_undouble
18 |
19 |
20 | Install via ``pip``:
21 |
22 | .. code-block:: console
23 |
24 | # The installation from pypi is disabled:
25 | pip install undouble
26 |
27 | # Install directly from github
28 | pip install git+https://github.com/erdogant/undouble
29 |
30 |
31 | Uninstalling
32 | ''''''''''''
33 |
34 | If you want to remove your ``undouble`` installation with your environment, it can be as following:
35 |
36 | .. code-block:: console
37 |
38 | # List all the active environments. undouble should be listed.
39 | conda env list
40 |
41 | # Remove the undouble environment
42 | conda env remove --name undouble
43 |
44 | # List all the active environments. undouble should be absent.
45 | conda env list
46 |
47 |
48 | Quickstart
49 | ''''''''''
50 |
51 | A quick example how to learn a model on a given dataset.
52 |
53 |
54 | .. code:: python
55 |
56 | # Import library
57 | from undouble import Undouble
58 |
59 | # Init with default settings
60 | model = Undouble()
61 |
62 | # Import example data
63 | targetdir = model.import_example(data='flowers')
64 |
65 | # Importing the files files from disk, cleaning and pre-processing
66 | model.import_data(targetdir)
67 |
68 | # Compute image-hash
69 | model.compute_hash()
70 |
71 | # Group images with image-hash <= threshold
72 | model.group(threshold=0)
73 |
74 | # Plot the images
75 | model.plot()
76 |
77 | # Move the images
78 | model.move_to_dir()
79 |
80 |
81 |
82 |
83 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/_static/css/carbon_ads_hor.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #000;
3 | }
4 |
5 | #carbonads * {
6 | margin: initial;
7 | padding: initial;
8 | }
9 |
10 | #carbonads {
11 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
12 | Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', Helvetica, Arial,
13 | sans-serif;
14 | }
15 |
16 | #carbonads {
17 | display: flex;
18 | max-width: 330px;
19 | background-color: hsl(0, 0%, 10%);
20 | box-shadow: 0 0 1px hsla(0, 0%, 0%, 0.5);
21 | }
22 |
23 | #carbonads a {
24 | color: #ddd;
25 | text-decoration: none;
26 | }
27 |
28 | #carbonads a:hover {
29 | color: #ddd;
30 | }
31 |
32 | #carbonads span {
33 | position: relative;
34 | display: block;
35 | overflow: hidden;
36 | }
37 |
38 | #carbonads .carbon-wrap {
39 | display: flex;
40 | }
41 |
42 | #carbonads .carbon-img {
43 | display: block;
44 | margin: 0;
45 | line-height: 1;
46 | }
47 |
48 | #carbonads .carbon-img img {
49 | display: block;
50 | }
51 |
52 | #carbonads .carbon-text {
53 | padding: 10px;
54 | text-align: left;
55 | font-size: 13px;
56 | margin-bottom: 16px;
57 | line-height: 1.5;
58 | }
59 |
60 | #carbonads .carbon-poweredby {
61 | display: block;
62 | padding: 6px 8px;
63 | background: #1e2021;
64 | color: #aaa;
65 | text-align: center;
66 | text-transform: uppercase;
67 | letter-spacing: 0.5px;
68 | font-weight: 600;
69 | font-size: 8px;
70 | line-height: 1;
71 | position: absolute;
72 | bottom: 0;
73 | right: 0;
74 | border-top-left-radius: 6px;
75 | }
76 |
--------------------------------------------------------------------------------
/docs/source/_static/css/carbon_ads_ver.css:
--------------------------------------------------------------------------------
1 | body {
2 | background: #000;
3 | }
4 |
5 | #carbonads {
6 | display: block;
7 | overflow: hidden;
8 | padding: 1em;
9 | max-width: 130px;
10 | background: hsl(0, 0%, 10%);
11 | box-shadow: 0 0 1px hsla(0, 0%, 0%, 0.5);
12 | text-align: center;
13 | font-size: 12px;
14 | font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Oxygen-Sans, Ubuntu, Cantarell, "Helvetica Neue", sans-serif;
15 | line-height: 1.5;
16 | }
17 |
18 | #carbonads a {
19 | color: #eee;
20 | text-decoration: none;
21 | }
22 |
23 | #carbonads a:hover {
24 | color: inherit;
25 | }
26 |
27 | #carbonads span {
28 | display: block;
29 | overflow: hidden;
30 | }
31 |
32 | .carbon-img {
33 | display: block;
34 | margin: 0 auto 8px;
35 | line-height: 1;
36 | }
37 |
38 | .carbon-text {
39 | display: block;
40 | margin-bottom: 8px;
41 | }
42 |
43 | .carbon-poweredby {
44 | display: block;
45 | padding: 6px 10px;
46 | background: #1e2021;
47 | text-transform: uppercase;
48 | letter-spacing: 0.5px;
49 | font-size: 7px;
50 | line-height: 1;
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #111;
3 | }
4 |
5 | #carbonads * {
6 | margin: initial;
7 | padding: initial;
8 | }
9 |
10 | #carbonads {
11 | font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto,
12 | Oxygen-Sans, Ubuntu, Cantarell, 'Helvetica Neue', Helvetica, Arial,
13 | sans-serif;
14 | }
15 |
16 | #carbonads {
17 | display: flex;
18 | max-width: 330px;
19 | background-color: hsl(0, 0%, 10%);
20 | box-shadow: 0 0 1px hsla(0, 0%, 0%, 0.5);
21 | }
22 |
23 | #carbonads a {
24 | color: #ddd;
25 | text-decoration: none;
26 | }
27 |
28 | #carbonads a:hover {
29 | color: #ddd;
30 | }
31 |
32 | #carbonads span {
33 | position: relative;
34 | display: block;
35 | overflow: hidden;
36 | }
37 |
38 | #carbonads .carbon-wrap {
39 | display: flex;
40 | }
41 |
42 | #carbonads .carbon-img {
43 | display: block;
44 | margin: 0;
45 | line-height: 1;
46 | }
47 |
48 | #carbonads .carbon-img img {
49 | display: block;
50 | }
51 |
52 | #carbonads .carbon-text {
53 | padding: 10px;
54 | text-align: left;
55 | font-size: 13px;
56 | margin-bottom: 16px;
57 | line-height: 1.5;
58 | }
59 |
60 | #carbonads .carbon-poweredby {
61 | display: block;
62 | padding: 6px 8px;
63 | background: #1e2021;
64 | color: #aaa;
65 | text-align: center;
66 | text-transform: uppercase;
67 | letter-spacing: 0.5px;
68 | font-weight: 600;
69 | font-size: 8px;
70 | line-height: 1;
71 | position: absolute;
72 | bottom: 0;
73 | right: 0;
74 | border-top-left-radius: 6px;
75 | }
76 |
--------------------------------------------------------------------------------
/docs/source/_static/css/custom_rectangle.css:
--------------------------------------------------------------------------------
1 | body {
2 | background-color: #111;
3 | }
4 |
5 | #carbonads {
6 | display: flex;
7 | justify-content: center;
8 | align-items: center;
9 | position: relative;
10 |
11 | font-family: -apple-system, BlinkMacSystemFont, Segoe UI, Roboto, Oxygen-Sans, Ubuntu, Cantarell, Helvetica Neue, Helvetica, Arial, sans-serif;
12 | width: 275px;
13 | height: 215px;
14 | text-align: center;
15 | background-color: hsl(0, 0%, 10%);
16 | box-shadow: 0 0 1px hsla(0, 0%, 0%, 0.5);
17 | }
18 |
19 | #carbonads a {
20 | color: #eee;
21 | text-decoration: none;
22 | }
23 |
24 | #carbonads a:hover {
25 | color: #eee;
26 | }
27 |
28 |
29 | #carbonads>span {
30 | display: block;
31 | position: relative;
32 | overflow: hidden;
33 | padding: 15px;
34 |
35 | border-radius: 4px;
36 | background-color: #1e2021;
37 | box-shadow:
38 | 0 0 1px 1px hsla(0, 0%, 0%, .05),
39 | 0 0 2px 2px hsla(0, 0%, 0%, .05),
40 | 0 0 4px 4px hsla(0, 0%, 0%, .05);
41 | }
42 |
43 |
44 | .carbon-img {
45 | display: block;
46 | margin: 0 0 8px;
47 |
48 | line-height: 1;
49 | }
50 |
51 | .carbon-img img {
52 | width: 150px;
53 | max-width: 150px !important;
54 | height: auto;
55 | }
56 |
57 | .carbon-text {
58 | display: block;
59 | margin-bottom: 8px;
60 | padding: 0 10px;
61 | font-size: 13px;
62 | font-weight: 500;
63 | line-height: 1.35;
64 | font-weight: normal;
65 | }
66 |
67 | .carbon-poweredby {
68 | display: block;
69 | position: absolute;
70 | right: 0;
71 | bottom: 0;
72 | left: 0;
73 | padding: 5px 6px;
74 | font-size: 8px;
75 | line-height: 1;
76 | letter-spacing: .5px;
77 | text-transform: uppercase;
78 | color: #aaa;
79 | background-color: hsl(0, 0%, 10%);
80 | }
81 |
--------------------------------------------------------------------------------
/docs/source/_templates/carbon_ads.html:
--------------------------------------------------------------------------------
1 | .. raw:: html
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/docs/source/_templates/layout.html:
--------------------------------------------------------------------------------
1 | {% extends "!layout.html" %}
2 |
3 | {% block menu %}
4 | {{ super() }}
5 | Index
6 | {% endblock %}
7 |
8 |
--------------------------------------------------------------------------------
/docs/source/add_bottom.add:
--------------------------------------------------------------------------------
1 | .. raw:: html
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/source/add_carbon.add:
--------------------------------------------------------------------------------
1 | .. raw:: html
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/docs/source/add_top.add:
--------------------------------------------------------------------------------
1 | .. raw:: html
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | sys.path.insert(0, os.path.abspath('../../'))
16 | import undouble
17 |
18 | currpath = os.path.dirname(__file__)
19 | sys.path.insert(0, os.path.abspath('./'))
20 | from helper import *
21 |
22 | ########################################################################################
23 | # -- Download rst file -----------------------------------------------------
24 | download_file('https://erdogant.github.io/docs/rst/sponsor.rst', "sponsor.rst")
25 | download_file('https://erdogant.github.io/docs/rst/add_carbon.add', "add_carbon.add")
26 | download_file('https://erdogant.github.io/docs/rst/add_top.add', "add_top.add")
27 | download_file('https://erdogant.github.io/docs/rst/add_bottom.add', "add_bottom.add")
28 | ########################################################################################
29 | add_includes_to_rst_files(top=False, bottom=True)
30 | ########################################################################################
31 | # Import PDF from directory in rst files
32 | # embed_in_rst(currpath, 'pdf', '.pdf', "Additional Information", 'Additional_Information.rst')
33 | ########################################################################################
34 | # Import notebooks in HTML format
35 | # convert_ipynb_to_html(currpath, 'notebooks', '.ipynb')
36 | # embed_in_rst(currpath, 'notebooks', '.html', "Notebook", 'notebook.rst')
37 | ########################################################################################
38 |
39 |
40 | # -- Project information -----------------------------------------------------
41 |
42 | project = 'undouble'
43 | copyright = '2020, Erdogan Taskesen'
44 | author = 'Erdogan Taskesen'
45 |
46 | # The master toctree document.
47 | master_doc = 'index'
48 |
49 | # The full version, including alpha/beta/rc tags
50 | release = 'undouble'
51 | version = str(undouble.__version__)
52 |
53 | # -- General configuration ---------------------------------------------------
54 |
55 | # Add any Sphinx extension module names here, as strings. They can be
56 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
57 | # ones.
58 | extensions = [
59 | "sphinx.ext.autodoc",
60 | "sphinx.ext.napoleon",
61 | "sphinx.ext.intersphinx",
62 | "sphinx.ext.autosectionlabel",
63 | "rst2pdf.pdfbuilder",
64 | ]
65 |
66 | napoleon_google_docstring = False
67 | napoleon_numpy_docstring = True
68 |
69 | # autodoc_mock_imports = ['cv2','keras']
70 |
71 |
72 | pdf_documents = [('index', u'undouble', u'undouble', u'Erdogan Taskesen'),]
73 |
74 | # Add any paths that contain templates here, relative to this directory.
75 | templates_path = ['_templates']
76 |
77 | # List of patterns, relative to source directory, that match files and
78 | # directories to ignore when looking for source files.
79 | # This pattern also affects html_static_path and html_extra_path.
80 | exclude_patterns = ["_build"]
81 |
82 |
83 | # The name of the Pygments (syntax highlighting) style to use.
84 | pygments_style = "sphinx"
85 |
86 | # -- Options for HTML output -------------------------------------------------
87 |
88 | # The theme to use for HTML and HTML Help pages. See the documentation for
89 | # a list of builtin themes.
90 | #
91 | # html_theme = 'alabaster'
92 | # html_theme = 'default'
93 | html_theme = 'sphinx_rtd_theme'
94 |
95 |
96 | # Add any paths that contain custom static files (such as style sheets) here,
97 | # relative to this directory. They are copied after the builtin static files,
98 | # so a file named "default.css" will overwrite the builtin "default.css".
99 | html_static_path = ['_static']
100 |
101 | # These paths are either relative to html_static_path
102 | # or fully qualified paths (eg. https://...)
103 | html_css_files = ['css/custom.css',]
104 |
105 | # html_sidebars = { '**': ['globaltoc.html', 'relations.html', 'carbon_ads.html', 'sourcelink.html', 'searchbox.html'] }
106 |
107 |
108 |
--------------------------------------------------------------------------------
/docs/source/hash_functions.rst:
--------------------------------------------------------------------------------
1 | Average hash
2 | ========================
3 |
4 | After the decolorizing and scaling step, each pixel block is compared to the average (as the name suggests) of all pixel values of the image. In the example below, we will generate a 64-bit hash, which means that the image is scaled to 8×8 pixels. If the value in the pixel block is larger than the average, it gets value 1 (white) and otherwise a 0 (black). The final image hash is followed by flattening the array into a vector.
5 |
6 |
7 | .. code:: python
8 |
9 | # Initialize with hash
10 | model = Undouble(method='ahash')
11 |
12 | # Import example
13 | X = model.import_example(data='cat_and_dog')
14 | imgs = model.import_data(X, return_results=True)
15 |
16 | # Compute hash for a single image
17 | hashs = model.compute_imghash(imgs['img'][0], to_array=False, hash_size=8)
18 |
19 | # The hash is a binairy array or vector.
20 | print(hashs)
21 |
22 | # Plot the image using the undouble plot_hash functionality
23 | model.results['img_hash_bin']
24 | model.plot_hash(idx=0)
25 |
26 | # Plot the image manually
27 | fig, ax = plt.subplots(1, 2, figsize=(8,8))
28 | ax[0].imshow(imgs['img'][0])
29 | ax[1].imshow(hashs[0])
30 |
31 |
32 | .. |ahash| image:: ../figs/ahash.png
33 |
34 | .. table:: Average hash
35 | :align: center
36 |
37 | +----------+
38 | | |ahash| |
39 | +----------+
40 |
41 |
42 | Perceptual hash
43 | ========================
44 |
45 | After the first step of decolorizing, a Discrete Cosine Transform (DCT) is applied; first per row and afterward per column. The pixels with high frequencies are cropped to 8 x 8 pixels. Each pixel block is then compared to the median of all gray values of the image. If the value in the pixel block is larger than the median, it gets value 1 and otherwise a 0. The final image hash is followed by flattening the array into a vector.
46 |
47 | .. code:: python
48 |
49 | # Initialize with hash
50 | model = Undouble(method='phash')
51 |
52 | # Import example
53 | X = model.import_example(data='cat_and_dog')
54 | imgs = model.import_data(X, return_results=True)
55 |
56 | # Compute hash for a single image
57 | hashs = model.compute_imghash(imgs['img'][0], to_array=False, hash_size=8)
58 |
59 | # The hash is a binairy array or vector.
60 | print(hashs)
61 |
62 | # Plot the image using the undouble plot_hash functionality
63 | model.results['img_hash_bin']
64 | model.plot_hash(idx=0)
65 |
66 | # Plot the image manually
67 | fig, ax = plt.subplots(1, 2, figsize=(8,8))
68 | ax[0].imshow(imgs['img'][0])
69 | ax[1].imshow(hashs[0])
70 |
71 | .. |phash| image:: ../figs/phash.png
72 |
73 | .. table:: Perceptual hash
74 | :align: center
75 |
76 | +----------+
77 | | |phash| |
78 | +----------+
79 |
80 |
81 | Differential hash
82 | ========================
83 |
84 | After the first step of decolorizing and scaling, the pixels are serially (from left to right per row) compared to their neighbor to the right. If the byte at position x is less than the byte at position (x+1), it gets value 1 and otherwise a 0. The final image hash is followed by flattening the array into a vector.
85 |
86 | .. code:: python
87 |
88 | # Initialize with hash
89 | model = Undouble(method='dhash')
90 |
91 | # Import example
92 | X = model.import_example(data='cat_and_dog')
93 | imgs = model.import_data(X, return_results=True)
94 |
95 | # Compute hash for a single image
96 | hashs = model.compute_imghash(imgs['img'][0], to_array=False, hash_size=8)
97 |
98 | # The hash is a binairy array or vector.
99 | print(hashs)
100 |
101 | # Plot the image using the undouble plot_hash functionality
102 | model.results['img_hash_bin']
103 | model.plot_hash(idx=0)
104 |
105 | # Plot the image manually
106 | fig, ax = plt.subplots(1, 2, figsize=(8,8))
107 | ax[0].imshow(imgs['img'][0])
108 | ax[1].imshow(hashs[0])
109 |
110 |
111 |
112 | .. |dhash| image:: ../figs/dhash.png
113 |
114 | .. table:: Differential hash
115 | :align: center
116 |
117 | +----------+
118 | | |dhash| |
119 | +----------+
120 |
121 |
122 | Haar wavelet hash
123 | ========================
124 |
125 | After the first step of decolorizing and scaling, a two-dimensional wavelet transform is applied to the image. Each pixel block is then compared to the median of all gray values of the image. If the value in the pixel block is larger than the median, it gets value 1 and otherwise a 0. The final image hash is followed by flattening the array into a vector.
126 |
127 | .. code:: python
128 |
129 | # Initialize with hash
130 | model = Undouble(method='whash-haar')
131 |
132 | # Import example
133 | X = model.import_example(data='cat_and_dog')
134 | imgs = model.import_data(X, return_results=True)
135 |
136 | # Compute hash for a single image
137 | hashs = model.compute_imghash(imgs['img'][0], to_array=False, hash_size=8)
138 |
139 | # The hash is a binairy array or vector.
140 | print(hashs)
141 |
142 | # Plot the image using the undouble plot_hash functionality
143 | model.results['img_hash_bin']
144 | model.plot_hash(idx=0)
145 |
146 | # Plot the image manually
147 | fig, ax = plt.subplots(1, 2, figsize=(8,8))
148 | ax[0].imshow(imgs['img'][0])
149 | ax[1].imshow(hashs[0])
150 |
151 |
152 | .. |whash| image:: ../figs/whash.png
153 |
154 | .. table:: Haar wavelet hash
155 | :align: center
156 |
157 | +----------+
158 | | |whash| |
159 | +----------+
160 |
161 |
162 | Crop-resistant hash
163 | ========================
164 |
165 | The Crop resistant hash is implemented as described in the paper "Efficient Cropping-Resistant Robust Image Hashing". DOI 10.1109/ARES.2014.85. This algorithm partitions the image into bright and dark segments, using a watershed-like algorithm, and then does an image hash on each segment. This makes the image much more resistant to cropping than other algorithms, with the paper claiming resistance to up to 50% cropping, while most other algorithms stop at about 5% cropping.
166 |
167 |
168 | .. code:: python
169 |
170 | # Import library
171 | from undouble import Undouble
172 |
173 | # Init with default settings
174 | model = Undouble()
175 |
176 | # Import example data
177 | targetdir = model.import_example(data='flowers')
178 |
179 | # Importing the files files from disk, cleaning and pre-processing
180 | model.import_data(targetdir)
181 |
182 | # Compute image-hash
183 | model.compute_hash(method='crop-resistant-hash')
184 |
185 | # Find images with image-hash <= threshold
186 | results = model.group(threshold=5)
187 |
188 | # Plot the images
189 | model.plot()
190 |
191 | # Print the output for demonstration
192 | print(model.results.keys())
193 |
194 | # The detected groups
195 | model.results['select_pathnames']
196 | model.results['select_scores']
197 | model.results['select_idx']
198 |
199 | # Plot the hash for the first group
200 | model.plot_hash(filenames=model.results['filenames'][model.results['select_idx'][0]])
201 |
202 |
203 | Plot image hash
204 | ========================
205 |
206 | All examples are created using the underneath code:
207 |
208 | .. code:: python
209 |
210 | # pip install imagesc
211 |
212 | import cv2
213 | from scipy.spatial import distance
214 | import numpy as np
215 | import matplotlib.pyplot as plt
216 | from imagesc import imagesc
217 | from undouble import Undouble
218 |
219 | methods = ['ahash', 'dhash', 'whash-haar']
220 |
221 | for method in methods:
222 | # Average Hash
223 | model = Undouble(method=method, hash_size=8)
224 | # Import example data
225 | targetdir = model.import_example(data='cat_and_dog')
226 | # Grayscaling and scaling
227 | model.import_data(targetdir)
228 | # Compute image for only the first image.
229 | hashs = model.compute_imghash(model.results['img'][0], to_array=True)
230 | # Compute the image-hash
231 | print(method + ' Hash:')
232 | image_hash = ''.join(hashs[0].astype(int).astype(str).ravel())
233 | print(image_hash)
234 |
235 | # Import image for plotting purposes
236 | img_g = cv2.imread(model.results['pathnames'][0], cv2.IMREAD_GRAYSCALE)
237 | img_r = cv2.resize(img_g, (8, 8), interpolation=cv2.INTER_AREA)
238 |
239 | # Make the figure
240 | fig, ax = plt.subplots(2, 2, figsize=(15, 10))
241 | ax[0][0].imshow(model.results['img'][0][..., ::-1])
242 | ax[0][0].axis('off')
243 | ax[0][0].set_title('Source image')
244 | ax[0][1].imshow(img_g, cmap='gray')
245 | ax[0][1].axis('off')
246 | ax[0][1].set_title('grayscale image')
247 | ax[1][0].imshow(img_r, cmap='gray')
248 | ax[1][0].axis('off')
249 | ax[1][0].set_title('grayscale image, size %.0dx%.0d' %(8, 8))
250 | ax[1][1].imshow(hashs[0], cmap='gray')
251 | ax[1][1].axis('off')
252 | ax[1][1].set_title(method + ' function')
253 |
254 | # Compute image hash for the 10 images.
255 | hashs = model.compute_imghash(model, to_array=False)
256 |
257 | # Compute number of differences across all images.
258 | adjmat = np.zeros((hashs.shape[0], hashs.shape[0]))
259 | for i, h1 in enumerate(hashs):
260 | for j, h2 in enumerate(hashs):
261 | adjmat[i, j] = np.sum(h1!=h2)
262 |
263 | # Compute the average image-hash difference.
264 | diff = np.mean(adjmat[np.triu_indices(adjmat.shape[0], k=1)])
265 | print('[%s] Average difference: %.2f' %(method, diff))
266 |
267 | # Make a heatmap to demonstrate the differences between the image-hashes
268 | imagesc.plot(hashs, cmap='gray', col_labels='', row_labels=model.results['filenames'], cbar=False, title=method + '\nAverage difference: %.3f' %(diff), annot=True)
269 |
270 |
271 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/helper.py:
--------------------------------------------------------------------------------
1 | import os
2 | from glob import glob
3 | import numpy as np
4 |
5 | # %% Download rst file
6 | def download_file(url_rst, filename):
7 | try:
8 | from urllib.request import urlretrieve
9 | if os.path.isfile(filename):
10 | os.remove(filename)
11 | print('Download %s..' %(filename))
12 | urlretrieve(url_rst, filename)
13 | except:
14 | print('Downloading %s failed.' %(url_rst))
15 |
16 | # %% Include ADD to rst files
17 | def add_includes_to_rst_files(top=True, bottom=True):
18 | skipfiles = ['sponsor.rst']
19 | for file_path in glob("*.rst"):
20 | if not np.isin(file_path, skipfiles):
21 | with open(file_path, "r+", encoding="utf8") as file:
22 | contents = file.read()
23 | if top and ".. include:: add_top.add" not in contents:
24 | file.seek(0)
25 | file.write(".. include:: add_top.add\n\n" + contents)
26 | print('Top Add included >%s' %(file_path))
27 | elif (not top) and ".. include:: add_top.add" in contents:
28 | contents = contents.replace(".. include:: add_top.add\n\n", "")
29 | print('Remove Top Add>%s' %(file_path))
30 | file.seek(0)
31 | file.truncate()
32 | file.write(contents)
33 |
34 | if bottom and ".. include:: add_bottom.add" not in contents:
35 | file.seek(0, 2)
36 | file.write("\n\n.. include:: add_bottom.add")
37 | print('Bottom Add included >%s' %(file_path))
38 | elif (not bottom) and ".. include:: add_bottom.add" in contents:
39 | contents = contents.replace(".. include:: add_bottom.add\n\n", "")
40 | print('Remove Bottom Add>%s' %(file_path))
41 | file.seek(0)
42 | file.truncate()
43 | file.write(contents)
44 |
45 | # %% ADD TO REST
46 | def adds_in_rst(filehandle):
47 | # Write carbon adds
48 | filehandle.write("\n\n.. raw:: html\n")
49 | filehandle.write("\n
")
50 | filehandle.write("\n ")
51 | filehandle.write('\n ')
52 | filehandle.write("\n ")
53 | filehandle.write("\n
")
54 |
55 | # %% SCAN DIRECTORY
56 | def scan_directory(currpath, directory, ext):
57 | # Uitlezen op ext
58 | path_to_files = os.path.join(currpath, '_static', directory)
59 | files_in_dir = np.array(os.listdir(path_to_files))
60 | Iloc = np.array(list(map(lambda x: x[-len(ext):]==ext, files_in_dir)))
61 | return files_in_dir[Iloc]
62 |
63 | # %% EMBED PDF IN RST
64 | def embed_in_rst(currpath, directory, ext, title, file_rst):
65 |
66 | try:
67 | # Uitlezen op extensie
68 | files_in_dir = scan_directory(currpath, directory, ext)
69 | print('---------------------------------------------------------------')
70 | print('[%s] embedding in RST from directory: [%s]' %(ext, directory))
71 |
72 | # Open file
73 | filehandle = open(file_rst, 'w')
74 | filehandle.write(".. _code_directive:\n\n" + title + "\n#######################\n\n")
75 |
76 | # 3. simple concat op
77 | for fname in files_in_dir:
78 | print('[%s] processed in rst' %(fname))
79 | title = "**" + fname[:-len(ext)] + "**\n"
80 | if ext=='.pdf':
81 | newstr = ":pdfembed:`src:_static/" + directory + "/" + fname + ", height:600, width:700, align:middle`"
82 | elif ext=='.html':
83 | newstr = ".. raw:: html\n\n" + ' '
84 | write_to_rst = title + "\n" + newstr + "\n\n\n\n"
85 | # Write to rst
86 | filehandle.write(write_to_rst)
87 |
88 | # ADDs in RST wegschrijven
89 | adds_in_rst(filehandle)
90 | # Close file
91 | filehandle.close()
92 | except:
93 | print('ERROR IN EMBEDDING IT IN RST.')
94 |
95 | # %% CONVERT NOTEBOOKS TO HTML
96 | def convert_ipynb_to_html(currpath, directory, ext):
97 | try:
98 | # Uitlezen op extensie
99 | files_in_dir = scan_directory(currpath, directory, ext)
100 | # 3. simple concat op
101 | for fname in files_in_dir:
102 | path_to_file = os.path.join('_static/', directory, fname)
103 | print('[%s] converting to HTML' %(path_to_file))
104 | os.system('jupyter nbconvert --to html ' + path_to_file)
105 | except:
106 | print('ERROR IN CONVERTING NOTEBOOK TO HTML.')
107 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | undouble's documentation!
2 | =========================
3 |
4 | |python| |pypi| |docs| |stars| |LOC| |downloads_month| |downloads_total| |license| |forks| |open issues| |project status| |medium| |colab| |repo-size| |donate|
5 |
6 |
7 | .. _schematic_overview:
8 |
9 | .. figure:: ../figs/schematic_overview.png
10 |
11 |
12 | -----------------------------------
13 |
14 |
15 | The aim of the ```undouble``` library is to detect (near-)identical images across an entire system or directory.
16 | It works using a multi-step process of pre-processing the images (grayscaling, normalizing, and scaling), computing the image hash, and the grouping of images based on threshold value.
17 |
18 | * 1. Detects images with a identical image-hash in a specified folder or your entire system.
19 | * 2. The threshold can be used to detect near-identical images, such as photo-bursts.
20 | * 3. Plots to examine the groupings.
21 | * 4. Functionality to systematically undouble.
22 |
23 | .. tip::
24 | `Read more details and the usage in the Medium Blog: Detection of Duplicate Images Using Image Hash Functions `_
25 |
26 |
27 |
28 | -----------------------------------
29 |
30 | .. note::
31 | **Your ❤️ is important to keep maintaining this package.** You can `support `_ in various ways, have a look at the `sponser page `_.
32 | Report bugs, issues and feature extensions at `github `_ page.
33 |
34 | .. code-block:: console
35 |
36 | pip install undouble
37 |
38 | -----------------------------------
39 |
40 |
41 |
42 | Content
43 | =======
44 |
45 | .. toctree::
46 | :maxdepth: 1
47 | :caption: Background
48 |
49 | Abstract
50 |
51 |
52 | .. toctree::
53 | :maxdepth: 1
54 | :caption: Installation
55 |
56 | Installation
57 |
58 |
59 | .. toctree::
60 | :maxdepth: 1
61 | :caption: Input/ Output
62 |
63 | input_output
64 |
65 |
66 | .. toctree::
67 | :maxdepth: 1
68 | :caption: Core functionalities
69 |
70 | core_functions
71 |
72 |
73 | .. toctree::
74 | :maxdepth: 1
75 | :caption: hash_functions
76 |
77 | hash_functions
78 |
79 |
80 | .. toctree::
81 | :maxdepth: 1
82 | :caption: Examples
83 |
84 | Examples
85 |
86 |
87 | .. toctree::
88 | :maxdepth: 1
89 | :caption: Documentation
90 |
91 | Documentation
92 | Coding quality
93 | undouble.undouble
94 |
95 |
96 |
97 | Indices and tables
98 | ==================
99 |
100 | * :ref:`genindex`
101 | * :ref:`modindex`
102 | * :ref:`search`
103 |
104 |
105 | .. |python| image:: https://img.shields.io/pypi/pyversions/undouble.svg
106 | :alt: |Python
107 | :target: https://erdogant.github.io/undouble/
108 |
109 | .. |pypi| image:: https://img.shields.io/pypi/v/undouble.svg
110 | :alt: |Python Version
111 | :target: https://pypi.org/project/undouble/
112 |
113 | .. |docs| image:: https://img.shields.io/badge/Sphinx-Docs-blue.svg
114 | :alt: Sphinx documentation
115 | :target: https://erdogant.github.io/undouble/
116 |
117 | .. |stars| image:: https://img.shields.io/github/stars/erdogant/undouble
118 | :alt: Stars
119 | :target: https://img.shields.io/github/stars/erdogant/undouble
120 |
121 | .. |LOC| image:: https://sloc.xyz/github/erdogant/undouble/?category=code
122 | :alt: lines of code
123 | :target: https://github.com/erdogant/undouble
124 |
125 | .. |downloads_month| image:: https://static.pepy.tech/personalized-badge/undouble?period=month&units=international_system&left_color=grey&right_color=brightgreen&left_text=PyPI%20downloads/month
126 | :alt: Downloads per month
127 | :target: https://pepy.tech/project/undouble
128 |
129 | .. |downloads_total| image:: https://static.pepy.tech/personalized-badge/undouble?period=total&units=international_system&left_color=grey&right_color=brightgreen&left_text=Downloads
130 | :alt: Downloads in total
131 | :target: https://pepy.tech/project/undouble
132 |
133 | .. |license| image:: https://img.shields.io/badge/license-MIT-green.svg
134 | :alt: License
135 | :target: https://github.com/erdogant/undouble/blob/master/LICENSE
136 |
137 | .. |forks| image:: https://img.shields.io/github/forks/erdogant/undouble.svg
138 | :alt: Github Forks
139 | :target: https://github.com/erdogant/undouble/network
140 |
141 | .. |open issues| image:: https://img.shields.io/github/issues/erdogant/undouble.svg
142 | :alt: Open Issues
143 | :target: https://github.com/erdogant/undouble/issues
144 |
145 | .. |project status| image:: http://www.repostatus.org/badges/latest/active.svg
146 | :alt: Project Status
147 | :target: http://www.repostatus.org/#active
148 |
149 | .. |medium| image:: https://img.shields.io/badge/Medium-Blog-green.svg
150 | :alt: Medium Blog
151 | :target: https://erdogant.github.io/undouble/pages/html/Documentation.html#medium-blog
152 |
153 | .. |donate| image:: https://img.shields.io/badge/Support%20this%20project-grey.svg?logo=github%20sponsors
154 | :alt: donate
155 | :target: https://erdogant.github.io/undouble/pages/html/Documentation.html#
156 |
157 | .. |colab| image:: https://colab.research.google.com/assets/colab-badge.svg
158 | :alt: Colab example
159 | :target: https://erdogant.github.io/undouble/pages/html/Documentation.html#colab-notebook
160 |
161 | .. |repo-size| image:: https://img.shields.io/github/repo-size/erdogant/undouble
162 | :alt: repo-size
163 | :target: https://img.shields.io/github/repo-size/erdogant/undouble
164 |
165 | .. include:: add_bottom.add
166 |
--------------------------------------------------------------------------------
/docs/source/input_output.rst:
--------------------------------------------------------------------------------
1 | Input
2 | ************
3 |
4 | The input for the :func:`undouble.undouble.Undouble.import_data` can be the following three types:
5 |
6 | * Directory path
7 | * File locations
8 | * Numpy array containing images
9 |
10 | The scanned files and directories can also be filtered on extention type, or directories can be black listed. Note that these settings need to be set during initialization. The black_list directory is set to undouble by default to make sure that readily moved files are not incorporated in the analysis.
11 |
12 | The following parameters can be changed during initialization:
13 |
14 | * Images are imported with the extention ([‘png’,’tiff’,’jpg’,’jfif’]).
15 | * Input image can be grayscaled during import.
16 | * Resizing images to save memory, such as to (128, 128).
17 |
18 |
19 |
20 | Directory
21 | ======================
22 |
23 | Images can imported recursively from a target directory.
24 |
25 | .. code:: python
26 |
27 | # Import library
28 | from undouble import Undouble
29 |
30 | # Init with default settings
31 | model = Undouble()
32 |
33 | # Import data
34 | input_list_of_files = model.import_example(data='flowers')
35 | input_directory, _ = os.path.split(input_list_of_files[0])
36 |
37 | # The target directory looks as following:
38 | print(input_directory)
39 | # 'C:\\TEMP\\flower_images'
40 |
41 | # Importing the files files from disk, cleaning and pre-processing
42 | model.import_data(input_directory)
43 |
44 | # [clustimage] >INFO> Extracting images from: [C:\\TEMP\\flower_images]
45 | # [clustimage] >INFO> [214] files are collected recursively from path: [C:\\TEMP\\flower_images]
46 | # [clustimage] >INFO> [214] images are extracted.
47 | # [clustimage] >INFO> Reading and checking images.
48 | # [clustimage] >INFO> Reading and checking images.
49 | # [clustimage]: 100%|██████████| 214/214 [00:01<00:00, 133.25it/s]
50 |
51 | # Compute hash
52 | model.compute_hash()
53 |
54 | # Find images with image-hash <= threshold
55 | model.group(threshold=0)
56 |
57 | # Plot the images
58 | model.plot()
59 |
60 |
61 | File locations
62 | ======================
63 |
64 | Read images recursively from a target directory.
65 |
66 | .. code:: python
67 |
68 | # Import library
69 | from undouble import Undouble
70 |
71 | # Init with default settings
72 | model = Undouble()
73 |
74 | # Import data; Pathnames to the images.
75 | input_list_of_files = model.import_example(data='flowers')
76 |
77 | # [undouble] >INFO> Store examples at [..\undouble\data]..
78 | # [undouble] >INFO> Downloading [flowers] dataset from github source..
79 | # [undouble] >INFO> Extracting files..
80 | # [undouble] >INFO> [214] files are collected recursively from path: [..\undouble\undouble\data\flower_images]
81 |
82 | # The list image path locations looks as following but may differ on your machine.
83 | print(input_list_of_files)
84 |
85 | # ['\\repos\\undouble\\undouble\\data\\flower_images\\0001.png',
86 | # '\\repos\\undouble\\undouble\\data\\flower_images\\0002.png',
87 | # '\\repos\\undouble\\undouble\\data\\flower_images\\0003.png',
88 | # ...]
89 |
90 | model.import_data(input_list_of_files)
91 |
92 | # [clustimage] >INFO> Reading and checking images.
93 | # [clustimage] >INFO> Reading and checking images.
94 | # [clustimage]: 100%|██████████| 214/214 [00:02<00:00, 76.44it/s]
95 |
96 | # Compute hash
97 | model.compute_hash()
98 |
99 | # Find images with image-hash <= threshold
100 | model.group(threshold=0)
101 |
102 | # Plot the images
103 | model.plot()
104 |
105 |
106 |
107 | Numpy Array
108 | ======================
109 |
110 | Images can also be in the form of a numpy-array.
111 |
112 | .. code:: python
113 |
114 | # Import library
115 | from undouble import Undouble
116 |
117 | # Init with default settings
118 | model = Undouble()
119 |
120 | # Import data; numpy array containing images.
121 | X, y = model.import_example(data='mnist')
122 |
123 | print(X)
124 | # array([[ 0., 0., 5., ..., 0., 0., 0.],
125 | # [ 0., 0., 0., ..., 10., 0., 0.],
126 | # [ 0., 0., 0., ..., 16., 9., 0.],
127 | # ...,
128 | # [ 0., 0., 1., ..., 6., 0., 0.],
129 | # [ 0., 0., 2., ..., 12., 0., 0.],
130 | # [ 0., 0., 10., ..., 12., 1., 0.]])
131 |
132 | # Compute hash
133 | model.compute_hash()
134 |
135 | # Find images with image-hash <= threshold
136 | model.group(threshold=0)
137 |
138 | # Plot the images
139 | model.plot()
140 |
141 |
142 |
143 | Output
144 | ************
145 |
146 | The output is stored in model.results
147 |
148 | .. code:: python
149 |
150 | # Import library
151 | from undouble import Undouble
152 |
153 | # Print all keys
154 | print(model.results.keys())
155 |
156 | # dict_keys(['img',
157 | # 'pathnames',
158 | # 'url',
159 | # 'filenames',
160 | # 'img_hash_bin',
161 | # 'img_hash_hex',
162 | # 'adjmat',
163 | # 'select_pathnames',
164 | # 'select_scores',
165 | # 'select_idx',
166 | # 'stats'])
167 |
168 | # Pathnames
169 | model.results['pathnames']
170 |
171 | # array(['D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0001.png',
172 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0002.png',
173 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0003.png',...
174 |
175 | # Filenames
176 | model.results['filenames']
177 | # array(['0001.png', '0002.png', '0003.png',...
178 |
179 | # Adjacency matrix
180 | model.results['adjmat']
181 | # array([[ 0, 24, 24, ..., 30, 28, 26],
182 | # [24, 0, 28, ..., 28, 18, 36],
183 | # [24, 28, 0, ..., 28, 28, 28],
184 | # ...,
185 | # [30, 28, 28, ..., 0, 24, 34],
186 | # [28, 18, 28, ..., 24, 0, 34],
187 | # [26, 36, 28, ..., 34, 34, 0]])
188 |
189 | # Select groupings
190 | model.results['select_idx']
191 | # [array([81, 82], dtype=int64),
192 | # array([90, 91, 92], dtype=int64),
193 | # array([169, 170], dtype=int64)]
194 |
195 |
196 | Extract Groups
197 | ******************
198 |
199 | Extracting the groups can be done using the group-index combined with the pathnames (or filenames).
200 |
201 | .. code:: python
202 |
203 | # Import library
204 | from undouble import Undouble
205 |
206 | # Init with default settings
207 | model = Undouble()
208 |
209 | # Import data; Pathnames to the images.
210 | input_list_of_files = model.import_example(data='flowers')
211 |
212 | # Import data from files.
213 | model.import_data(input_list_of_files)
214 |
215 | # Compute hash
216 | model.compute_hash()
217 |
218 | # Find images with image-hash <= threshold
219 | model.group(threshold=0)
220 |
221 | # [undouble] >INFO> [3] groups with similar image-hash.
222 | # [undouble] >INFO> [3] groups are detected for [7] images.
223 |
224 | # Plot the images
225 | model.plot()
226 |
227 | # Extract the pathnames for each group
228 | for idx_group in model.results['select_idx']:
229 | print(idx_group)
230 | print(model.results['pathnames'][idx_group])
231 |
232 |
233 | # [81 82]
234 | # ['D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0082 - Copy.png'
235 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0082.png']
236 | # [90 91 92]
237 | # ['D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0090 - Copy (2).png'
238 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0090 - Copy.png'
239 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0090.png']
240 | # [169 170]
241 | # ['D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0167 - Copy.png'
242 | # 'D:\\REPOS\\undouble\\undouble\\data\\flower_images\\0167.png']
243 |
244 |
245 |
246 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/docs/source/requirements.txt:
--------------------------------------------------------------------------------
1 | pip install sphinx_rtd_theme
2 |
3 |
--------------------------------------------------------------------------------
/docs/source/sponsor.rst:
--------------------------------------------------------------------------------
1 | .. tip::
2 | You can support this project in various ways ❤️
3 |
4 | * Become a `Sponsor `_!
5 | * `Buy `_ me a coffee! I ❤️ coffee :)
6 | * `Donate `_ in Bitcoin.
7 | * Follow me on Medium! Go to my `medium profile `_ and press *follow*.
8 | * Subscribe to me on Substack! Go to my `profile `_ where you can *subscribe*.
9 | * **Star** this repo at the github page.
10 | * You can also buy a digital educational product over `here `_.
11 | * Other contributions can be in the form of feature requests, idea discussions, reporting bugs, opening pull requests.
12 |
13 | Cheers Mate.
14 |
15 | .. raw:: html
16 |
17 |
18 |
--------------------------------------------------------------------------------
/docs/source/undouble.undouble.rst:
--------------------------------------------------------------------------------
1 | API References
2 | ------------------------------------------------
3 |
4 |
5 | .. automodule:: undouble.undouble
6 | :members:
7 | :undoc-members:
8 |
9 | .. include:: add_bottom.add
--------------------------------------------------------------------------------
/make_build.sh:
--------------------------------------------------------------------------------
1 | echo "Cleaning previous builds first.."
2 | rm -rf dist
3 | rm -rf build
4 | rm -rf undouble.egg-info
5 |
6 | echo "Making new wheel.."
7 | echo ""
8 | python setup.py bdist_wheel
9 | echo ""
10 |
11 | echo "Making source build .."
12 | echo ""
13 | python setup.py sdist
14 | echo ""
15 |
16 | read -p "Press [Enter] to install the pip package..."
17 | pip install -U dist/undouble-0.1.0-py3-none-any.whl
18 | echo ""
19 |
20 | read -p ">twine upload dist/* TO UPLOAD TO PYPI..."
21 | echo ""
22 |
23 | read -p "Press [Enter] key to close window..."
24 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "undouble"
7 | dynamic = ["version"]
8 | authors = [{ name = "Erdogan Taskesen", email = "erdogant@gmail.com" },]
9 | description = "Undouble is a Python package to detect (near-)identical images."
10 | readme = "README.md"
11 | requires-python = ">=3"
12 | license = { file = "LICENSE" }
13 | keywords = ["images", "duplicates", "identical"]
14 | classifiers = [
15 | "Programming Language :: Python :: 3",
16 | "License :: OSI Approved :: BSD License",
17 | "Operating System :: OS Independent",
18 | ]
19 | dependencies = [
20 | 'matplotlib',
21 | 'numpy',
22 | 'tqdm',
23 | 'clustimage>=1.6.6',
24 | 'ismember',
25 | 'datazets>=1.0.0',
26 | # 'opencv-python-headless',
27 | ]
28 |
29 | [project.urls]
30 | Homepage = "https://erdogant.github.io/undouble"
31 | Download = "https://github.com/erdogant/undouble/archive/{version}.tar.gz"
32 |
33 | [tool.setuptools]
34 | packages = ["undouble"]
35 | include-package-data = true
36 |
37 | [tool.setuptools.dynamic]
38 | version = { attr = "undouble.__version__" }
39 |
--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
1 | sphinx_rtd_theme
2 | rst2pdf
3 | spyder-kernels==2.3.*
4 | sphinx
5 | pytest
6 | git+https://github.com/SuperKogito/sphinxcontrib-pdfembed.git
7 | sphinxcontrib-fulltoc
8 | nbconvert # jupyter nbconvert --to html notebook.ipynb
9 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | tqdm
4 | clustimage>=1.6.6
5 | ismember
6 | datazets>=1.0.0
7 |
--------------------------------------------------------------------------------
/requirements_additional.txt:
--------------------------------------------------------------------------------
1 | piexif
2 | geopy
3 | folium
4 | opencv-python-headless
5 | # opencv-python
6 |
7 |
--------------------------------------------------------------------------------
/setup.py.depricated:
--------------------------------------------------------------------------------
1 | import setuptools
2 | import re
3 |
4 | # versioning ------------
5 | VERSIONFILE="undouble/__init__.py"
6 | getversion = re.search( r"^__version__ = ['\"]([^'\"]*)['\"]", open(VERSIONFILE, "rt").read(), re.M)
7 | if getversion:
8 | new_version = getversion.group(1)
9 | else:
10 | raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,))
11 |
12 | # Setup ------------
13 | with open("README.md", "r", encoding="utf8") as fh:
14 | long_description = fh.read()
15 | setuptools.setup(
16 | install_requires=['matplotlib',
17 | 'numpy',
18 | 'tqdm',
19 | 'clustimage>=1.6.6',
20 | 'ismember',
21 | 'datazets>=1.0.0',
22 | #'opencv-python-headless',
23 | ],
24 | python_requires='>=3',
25 | name='undouble',
26 | version=new_version,
27 | author="Erdogan Taskesen",
28 | author_email="erdogant@gmail.com",
29 | description="Python package undouble",
30 | long_description=long_description,
31 | long_description_content_type="text/markdown",
32 | url="https://erdogant.github.io/undouble",
33 | download_url = 'https://github.com/erdogant/undouble/archive/'+new_version+'.tar.gz',
34 | packages=setuptools.find_packages(), # Searches throughout all dirs for files to include
35 | include_package_data=True, # Must be true to include files depicted in MANIFEST.in
36 | license_files=["LICENSE"],
37 | classifiers=[
38 | "Programming Language :: Python :: 3",
39 | "License :: OSI Approved :: BSD License",
40 | "Operating System :: OS Independent",
41 | ],
42 | )
43 |
--------------------------------------------------------------------------------
/undouble/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | from datazets import get as import_example
4 |
5 | from undouble.undouble import Undouble
6 |
7 | from undouble.undouble import (
8 | compute_blur,
9 | seperate_path,
10 | sort_images,
11 | )
12 |
13 | __author__ = 'Erdogan Tasksen'
14 | __email__ = 'erdogant@gmail.com'
15 | __version__ = '1.4.10'
16 |
17 | # Setup root logger
18 | _logger = logging.getLogger('undouble')
19 | _log_handler = logging.StreamHandler()
20 | _fmt = '[{asctime}] [{name}] [{levelname}] {msg}'
21 | _formatter = logging.Formatter(fmt=_fmt, style='{', datefmt='%d-%m-%Y %H:%M:%S')
22 | _log_handler.setFormatter(_formatter)
23 | _log_handler.setLevel(logging.DEBUG)
24 | _logger.addHandler(_log_handler)
25 | _logger.propagate = False
26 |
27 | # module level doc-string
28 | __doc__ = """
29 | undouble
30 | =====================================================================
31 |
32 | Python package undouble is to detect (near-)identical images.
33 |
34 | The aim of ``undouble`` is to detect (near-)identical images. It works using a multi-step proces of pre-processing the
35 | images (grayscaling, normalizing, and scaling), computing the image-hash, and grouping of images based on threshold value.
36 | A threshold of 0 will group images with an identical image-hash.
37 | The grouped can be visualized with the plot() functionality and easily moved with the move() functionality. When
38 | moving images, the image in the group with the largest resolution will be copied, and all other images are moved to
39 | the "undouble" subdirectory.
40 |
41 | The following steps are taken:
42 | 1. Read recursively all images from directory with the specified extensions.
43 | 2. Compute image hash.
44 | 3. Group similar images.
45 | 4. Move if desired.
46 |
47 | Example
48 | -------
49 | >>> # Import library
50 | >>> from undouble import Undouble
51 | >>>
52 | >>> # Init with default settings
53 | >>> model = Undouble(method='phash', hash_size=8)
54 | >>>
55 | >>> # Import example data
56 | >>> targetdir = model.import_example(data='flowers')
57 | >>>
58 | >>> # Importing the files files from disk, cleaning and pre-processing
59 | >>> model.import_data(targetdir)
60 | >>>
61 | >>> # Compute image-hash
62 | >>> model.compute_hash()
63 | >>>
64 | >>> # Group images with image-hash <= threshold
65 | >>> model.group(threshold=0)
66 | >>>
67 | >>> # Plot the images
68 | >>> model.plot()
69 | >>>
70 | >>> # Move the images
71 | >>> model.move_to_dir(gui=True)
72 |
73 | References
74 | ----------
75 | * Blog: https://towardsdatascience.com/detection-of-duplicate-images-using-image-hash-functions-4d9c53f04a75
76 | * Github: https://github.com/erdogant/undouble
77 | * Documentation: https://erdogant.github.io/undouble/
78 |
79 | """
80 |
--------------------------------------------------------------------------------
/undouble/tests/test_undouble.py:
--------------------------------------------------------------------------------
1 | from undouble import Undouble
2 | import itertools as it
3 | import numpy as np
4 | import unittest
5 | from tqdm import tqdm
6 |
7 | class TestUNDOUBLE(unittest.TestCase):
8 |
9 | def test_import_data(self):
10 | model = Undouble()
11 | # Import flowers example
12 | X = model.import_example(data='flowers')
13 |
14 | # Check numpy array imports
15 | model.import_data(X)
16 | # assert model.results['img'].shape==(214, 128, 128, 4)
17 | assert len(model.results['pathnames'])==214
18 | assert len(model.results['filenames'])==214
19 | assert set(model.results.keys())==set(['img', 'feat', 'pathnames', 'filenames', 'url'])
20 |
21 | def test_compute_imghash(self):
22 | model = Undouble()
23 | # Import flowers example
24 | X = model.import_example(data='flowers')
25 | imgs = model.import_data(X, return_results=True)
26 |
27 | hash_sizes=[4,8,16]
28 | for hash_size in hash_sizes:
29 | hashs = model.compute_imghash(imgs['img'][0], to_array=True, hash_size=hash_size)
30 | assert len(hashs[0])==(hash_size*hash_size)
31 |
32 | hashs = model.compute_imghash(imgs['img'][0:5], to_array=True, hash_size=8)
33 | assert len(hashs)==5
34 | assert hashs[0].shape==(64,)
35 | hashs = model.compute_imghash(imgs['img'][0:5], to_array=False, hash_size=8)
36 | assert len(hashs)==5
37 | assert hashs[0].shape==(8,8)
38 |
39 | hashs = model.compute_imghash(imgs['img'][0], to_array=True, hash_size=8)
40 | assert len(hashs)==1
41 | assert hashs[0].shape==(64,)
42 | hashs = model.compute_imghash(imgs['img'][0], to_array=False, hash_size=8)
43 | assert len(hashs)==1
44 | assert hashs[0].shape==(8,8)
45 |
46 | def test_compute_hash(self):
47 | model = Undouble(method='phash')
48 | # Import flowers example
49 | X = model.import_example(data='flowers')
50 | # Import data
51 | model.import_data(X, return_results=False)
52 | # Compute Hash
53 | model.compute_hash()
54 | assert set(model.results.keys())==set(['img', 'url', 'pathnames', 'filenames', 'img_hash_bin', 'img_hash_hex', 'adjmat'])
55 |
56 | param_grid = {
57 | 'method': ['ahash','phash','dhash','whash-haar','crop-resistant-hash'],
58 | 'grayscale':[True, False],
59 | 'hash_size' : [4, 8, 16],
60 | 'dim' : [(64,64), (128,128), (256,256)]
61 | }
62 |
63 | allNames = param_grid.keys()
64 | combinations = it.product(*(param_grid[Name] for Name in allNames))
65 | combinations=list(combinations)
66 |
67 | for combination in tqdm(combinations):
68 | print(combination)
69 | model = Undouble(method=combination[0], grayscale=combination[1], hash_size=combination[2], dim=combination[3], verbose=40)
70 | # Import data
71 | model.import_data(X, return_results=False)
72 | # Compute Hash
73 | assert model.compute_hash(return_dict=True)
74 |
75 |
--------------------------------------------------------------------------------