├── src └── main │ ├── resources │ └── META-INF │ │ └── MANIFEST.MF │ └── java │ ├── CsvCharacterExtractor.java │ ├── CombinedOutputData.java │ ├── ConfigData.java │ ├── ConfigReader.java │ └── CsvUniqueCharacterProcessor.java ├── .idea ├── vcs.xml ├── copyright │ ├── profiles_settings.xml │ └── Supyrb__Johannes.xml ├── modules.xml ├── misc.xml └── compiler.xml ├── in └── example.csv ├── config.xml ├── pom.xml ├── LICENSE ├── CsvCharacterExtractor.iml ├── .gitignore └── README.md /src/main/resources/META-INF/MANIFEST.MF: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | Main-Class: CsvCharacterExtractor 3 | 4 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/copyright/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /in/example.csv: -------------------------------------------------------------------------------- 1 | ID,Description,en,de,jp,ko,zh-CN,ru 2 | General.Cancel,"Cancel phrase - No more than 15 characters, for buttons and such",Cancel,Abbrechen,キャンセル,취소,取消,Отмена 3 | General.Confirm,"Confirm phrase - No more than 15 characters, for buttons and such",Confirm,Bestätigen,決定,확인,确认,Принять 4 | General.Continue,"Continue Phrase - No more than 15 characters, for buttons and such",Continue,Fortsetzen,続ける,계속,继续,Продолжить игру 5 | Example.Placerholder, "This show how placeholders are used",Continue {0},Fortsetzen {0},続ける {0},계속 {0},继续 {0}。,Продолжить игру {0} -------------------------------------------------------------------------------- /config.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | in/example.csv 5 | 6 | 7 | out/ 8 | 9 | 10 | true 11 | 0123456789+-,.!?- 12 | {} 13 | 14 | 15 | default 16 | en 17 | de 18 | ru 19 | 20 | -------------------------------------------------------------------------------- /.idea/copyright/Supyrb__Johannes.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/compiler.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | CharacterProcessor 8 | CsvCharacterExtractor 9 | 0.1-SNAPSHOT 10 | 11 | 12 | 13 | 14 | com.univocity 15 | univocity-parsers 16 | 2.5.9 17 | jar 18 | 19 | 20 | 21 | com.vdurmont 22 | emoji-java 23 | 4.0.0 24 | 25 | 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Johannes Deml 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/main/java/CsvCharacterExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * -------------------------------------------------------------------------------------------------------------------- 3 | * 4 | * Copyright (c) 2018 Supyrb. All rights reserved. 5 | * 6 | * 7 | * Johannes Deml 8 | * send@johannesdeml.com 9 | * 10 | * -------------------------------------------------------------------------------------------------------------------- 11 | */ 12 | 13 | import java.util.ArrayList; 14 | 15 | public class CsvCharacterExtractor { 16 | public static void main(String[] args) { 17 | System.out.println("Running CsvCharacterExtractor"); 18 | 19 | ConfigData config = new ConfigData("./in/table.csv", "./out/", false,"", "", new ArrayList()); 20 | ConfigReader configReader = new ConfigReader(); 21 | configReader.parseXmlFile("./config.xml", config); 22 | CsvUniqueCharacterProcessor processor = new CsvUniqueCharacterProcessor(); 23 | processor.initialize(config); 24 | processor.run(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /CsvCharacterExtractor.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm 2 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 3 | 4 | # User-specific stuff: 5 | .idea/**/workspace.xml 6 | .idea/**/tasks.xml 7 | .idea/dictionaries 8 | 9 | # Sensitive or high-churn files: 10 | .idea/**/dataSources/ 11 | .idea/**/dataSources.ids 12 | .idea/**/dataSources.xml 13 | .idea/**/dataSources.local.xml 14 | .idea/**/sqlDataSources.xml 15 | .idea/**/dynamic.xml 16 | .idea/**/uiDesigner.xml 17 | 18 | # Gradle: 19 | .idea/**/gradle.xml 20 | .idea/**/libraries 21 | 22 | # CMake 23 | cmake-build-debug/ 24 | cmake-build-release/ 25 | 26 | # Mongo Explorer plugin: 27 | .idea/**/mongoSettings.xml 28 | 29 | ## File-based project format: 30 | *.iws 31 | 32 | ## Plugin-specific files: 33 | 34 | # IntelliJ 35 | artifacts/ 36 | target/ 37 | out/ 38 | 39 | # mpeltonen/sbt-idea plugin 40 | .idea_modules/ 41 | 42 | # JIRA plugin 43 | atlassian-ide-plugin.xml 44 | 45 | # Cursive Clojure plugin 46 | .idea/replstate.xml 47 | 48 | # Crashlytics plugin (for Android Studio and IntelliJ) 49 | com_crashlytics_export_strings.xml 50 | crashlytics.properties 51 | crashlytics-build.properties 52 | fabric.properties 53 | -------------------------------------------------------------------------------- /src/main/java/CombinedOutputData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * -------------------------------------------------------------------------------------------------------------------- 3 | * 4 | * Copyright (c) 2018 Supyrb. All rights reserved. 5 | * 6 | * 7 | * Johannes Deml 8 | * send@johannesdeml.com 9 | * 10 | * -------------------------------------------------------------------------------------------------------------------- 11 | */ 12 | 13 | import java.util.List; 14 | 15 | public class CombinedOutputData { 16 | private String name; 17 | private List columns; 18 | 19 | public CombinedOutputData(String name, List columns) { 20 | this.name = name; 21 | this.columns = columns; 22 | } 23 | 24 | public String getName() { 25 | return name; 26 | } 27 | 28 | public void setName(String name) { 29 | this.name = name; 30 | } 31 | 32 | public List getColumns() { 33 | return columns; 34 | } 35 | 36 | public void setColumns(List columns) { 37 | this.columns = columns; 38 | } 39 | 40 | public void addColumn(String columnName) { 41 | columns.add(columnName); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CSV Character Extractor 2 | 3 | ![Csv Character Extractor workflow and functionality](https://i.imgur.com/ionpTPp.gif) 4 | 5 | ## Functionality 6 | Extract all unique characters of each column of a csv file, combine and manipulate results and store the results in text files for further usage. 7 | This tool was developed to create font assets for TextmeshPro in Unity. Creating textures with just the character you need is essential for languages like Chinese, Japanese or Korean. 8 | 9 | ### Input 10 | * Input file can be defined in config.xml, default value is "in/example.csv" 11 | * Languages are defined in columns, first column defines the language name (see [example.csv](in/example.csv)) 12 | * Column `ID` and `Description` will be ignored 13 | * Newline character (\n\r) and all emojis will be ignored 14 | 15 | ### Output 16 | * Text files are created for each language and named "ColumnName.txt". Output path can be defined in config.xml 17 | * One file per column, expecting to have one language per column 18 | 19 | ## Requirements 20 | * [Java Runtime environment](http://www.oracle.com/technetwork/java/javase/downloads/jre8-downloads-2133155.html) 21 | 22 | ## Download 23 | * [latest release](https://github.com/JohannesDeml/CsvCharacterExtractor/releases) 24 | 25 | ## Run 26 | * Windows: Doubleclick Run.bat 27 | * Windows, Mac, Linux: Run `java -jar CsvCharacterExtractor.jar` in the terminal 28 | ### Config usage 29 | * With the config you can set the in and out path as well as characters that should be always or never included. Take a look at the [example config](config.xml) 30 | * Paths can be relative, e.g. `in/example.csv` 31 | * Paths can be absolute, e.g. `C:/Users/UserName/Documents/LanguageCharacterFiles/` 32 | * Use forward slashes only `/` 33 | * Automatically add lower and upper case charaters to the unique characters file 34 | * Create union files of multiple separate columns 35 | ## Example 36 | [Example Spreadsheet](https://docs.google.com/spreadsheets/d/1WmGauAzcCyQu7OcOnFP2Ypx2x9xuJCclpf7p25cFpz0/edit#gid=1088591893) 37 | 38 | ## Roadmap 39 | * Document code 40 | * Add information on how to build the project 41 | 42 | ## Third Party Libraries 43 | * https://github.com/uniVocity/univocity-parsers (Apache 2.0 License) 44 | * https://github.com/vdurmont/emoji-java (MIT License) 45 | -------------------------------------------------------------------------------- /src/main/java/ConfigData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * -------------------------------------------------------------------------------------------------------------------- 3 | * 4 | * Copyright (c) 2018 Supyrb. All rights reserved. 5 | * 6 | * 7 | * Johannes Deml 8 | * send@johannesdeml.com 9 | * 10 | * -------------------------------------------------------------------------------------------------------------------- 11 | */ 12 | 13 | import java.util.List; 14 | 15 | public class ConfigData { 16 | private String inPath; 17 | private String outPath; 18 | private boolean includeUpperAndLowerCase; 19 | private String includeCharacters; 20 | private String excludeCharacters; 21 | private List combinedOutputTargets; 22 | 23 | public ConfigData(String inPath, String outPath, boolean includeUpperAndLowerCase, String includeCharacters, 24 | String excludeCharacters, List combinedOutputTargets) { 25 | this.inPath = inPath; 26 | this.outPath = outPath; 27 | this.includeUpperAndLowerCase = includeUpperAndLowerCase; 28 | this.includeCharacters = includeCharacters; 29 | this.excludeCharacters = excludeCharacters; 30 | this.combinedOutputTargets = combinedOutputTargets; 31 | } 32 | 33 | public String getInPath() { 34 | return inPath; 35 | } 36 | 37 | public void setInPath(String inPath) { 38 | this.inPath = inPath; 39 | } 40 | 41 | public String getOutPath() { 42 | return outPath; 43 | } 44 | 45 | public void setOutPath(String outPath) { 46 | this.outPath = outPath; 47 | } 48 | 49 | public boolean isIncludeUpperAndLowerCase() { 50 | return includeUpperAndLowerCase; 51 | } 52 | 53 | public void setIncludeUpperAndLowerCase(boolean includeUpperAndLowerCase) { 54 | this.includeUpperAndLowerCase = includeUpperAndLowerCase; 55 | } 56 | 57 | public String getIncludeCharacters() { 58 | return includeCharacters; 59 | } 60 | 61 | public void setIncludeCharacters(String includeCharacters) { 62 | this.includeCharacters = includeCharacters; 63 | } 64 | 65 | public String getExcludeCharacters() { 66 | return excludeCharacters; 67 | } 68 | 69 | public void setExcludeCharacters(String excludeCharacters) { 70 | this.excludeCharacters = excludeCharacters; 71 | } 72 | 73 | public List getCombinedOutputTargets() { 74 | return combinedOutputTargets; 75 | } 76 | 77 | public void setCombinedOutputTargets(List combinedOutputTargets) { 78 | this.combinedOutputTargets = combinedOutputTargets; 79 | } 80 | 81 | public void addCombinedOutputTarget(CombinedOutputData target) { 82 | this.combinedOutputTargets.add(target); 83 | } 84 | } 85 | 86 | -------------------------------------------------------------------------------- /src/main/java/ConfigReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * -------------------------------------------------------------------------------------------------------------------- 3 | * 4 | * Copyright (c) 2018 Supyrb. All rights reserved. 5 | * 6 | * 7 | * Johannes Deml 8 | * send@johannesdeml.com 9 | * 10 | * -------------------------------------------------------------------------------------------------------------------- 11 | */ 12 | 13 | import org.w3c.dom.Document; 14 | import org.w3c.dom.Element; 15 | import org.w3c.dom.NodeList; 16 | import org.xml.sax.SAXException; 17 | 18 | import javax.xml.parsers.DocumentBuilder; 19 | import javax.xml.parsers.DocumentBuilderFactory; 20 | import javax.xml.parsers.ParserConfigurationException; 21 | import java.io.IOException; 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | 25 | public class ConfigReader { 26 | 27 | public ConfigReader() { 28 | } 29 | 30 | public void parseXmlFile(String relativeConfigPath, ConfigData configData) { 31 | //get the factory 32 | DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance(); 33 | Document dom = null; 34 | 35 | try { 36 | 37 | //Using factory get an instance of document builder 38 | DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder(); 39 | 40 | //parse using builder to get DOM representation of the XML file 41 | dom = documentBuilder.parse(relativeConfigPath); 42 | 43 | 44 | }catch(ParserConfigurationException pce) { 45 | pce.printStackTrace(); 46 | }catch(SAXException se) { 47 | se.printStackTrace(); 48 | }catch(IOException ioe) { 49 | ioe.printStackTrace(); 50 | } 51 | 52 | if(dom == null) { 53 | System.out.println("Couldn't read config file, proceeding with default values"); 54 | return; 55 | } 56 | 57 | Element root = dom.getDocumentElement(); 58 | 59 | NodeList inNodes = root.getElementsByTagName("in"); 60 | if(inNodes == null || inNodes.getLength() <= 0) { 61 | System.out.println("Couldn't find in node in config file, proceeding with default values"); 62 | } 63 | else { 64 | Element node = (Element)inNodes.item(0); 65 | configData.setInPath(getTextValue(node, "path", configData.getInPath())); 66 | } 67 | 68 | 69 | NodeList outNodes = root.getElementsByTagName("out"); 70 | if(outNodes == null || outNodes.getLength() <= 0) { 71 | System.out.println("Couldn't find out node in config file, proceeding with default values"); 72 | } 73 | else { 74 | Element node = (Element)outNodes.item(0); 75 | configData.setOutPath(getTextValue(node, "path", configData.getOutPath())); 76 | } 77 | 78 | NodeList modificationNodes = root.getElementsByTagName("modification"); 79 | if(modificationNodes == null || modificationNodes.getLength() <= 0) { 80 | System.out.println("Couldn't find modification node in config file, proceeding with default values"); 81 | } 82 | else { 83 | Element node = (Element)modificationNodes.item(0); 84 | String upperLowerCaseString = getTextValue(node, "include-upper-and-lower-case", configData.isIncludeUpperAndLowerCase()?"true":"false"); 85 | configData.setIncludeUpperAndLowerCase(upperLowerCaseString.equals("true")); 86 | configData.setIncludeCharacters(getTextValue(node, "include-characters", configData.getIncludeCharacters())); 87 | configData.setExcludeCharacters(getTextValue(node, "exclude-characters", configData.getExcludeCharacters())); 88 | } 89 | 90 | NodeList combineNodes = root.getElementsByTagName("combine"); 91 | if(combineNodes == null || combineNodes.getLength() <= 0) { 92 | System.out.println("Couldn't find combine node in config file, proceeding with default values"); 93 | } 94 | else { 95 | for (int i = 0; i < combineNodes.getLength(); i++) { 96 | Element node = (Element)combineNodes.item(i); 97 | String name = getTextValue(node, "name", String.format("combinedValues%d", i)); 98 | List columns = getTextValues(node, "column"); 99 | configData.addCombinedOutputTarget(new CombinedOutputData(name, columns)); 100 | } 101 | } 102 | } 103 | 104 | private String getTextValue(Element element, String tagName, String defaultValue) { 105 | if(element == null) { 106 | return defaultValue; 107 | } 108 | NodeList nodes = element.getElementsByTagName(tagName); 109 | if(nodes == null || nodes.getLength() == 0) { 110 | return defaultValue; 111 | } 112 | return nodes.item(0).getTextContent(); 113 | } 114 | 115 | private List getTextValues(Element element, String tagName) { 116 | List list = new ArrayList(); 117 | NodeList nodes = element.getElementsByTagName(tagName); 118 | if(nodes == null || nodes.getLength() == 0) { 119 | return list; 120 | } 121 | 122 | for (int i = 0; i < nodes.getLength(); i++) { 123 | list.add(nodes.item(i).getTextContent()); 124 | } 125 | 126 | return list; 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /src/main/java/CsvUniqueCharacterProcessor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * -------------------------------------------------------------------------------------------------------------------- 3 | * 4 | * Copyright (c) 2018 Supyrb. All rights reserved. 5 | * 6 | * 7 | * Johannes Deml 8 | * send@johannesdeml.com 9 | * 10 | * -------------------------------------------------------------------------------------------------------------------- 11 | */ 12 | 13 | import com.univocity.parsers.common.processor.ColumnProcessor; 14 | import com.univocity.parsers.csv.CsvParser; 15 | import com.univocity.parsers.csv.CsvParserSettings; 16 | import com.vdurmont.emoji.EmojiParser; 17 | 18 | import java.io.*; 19 | import java.util.HashMap; 20 | import java.util.HashSet; 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | public class CsvUniqueCharacterProcessor { 25 | 26 | private CsvParserSettings parserSettings; 27 | private ColumnProcessor rowProcessor; 28 | private CsvParser parser; 29 | private Map> uniqueLanguageCharacters; 30 | private HashSet allUniqueCharacters; 31 | private ConfigData config; 32 | 33 | public CsvUniqueCharacterProcessor() { 34 | parserSettings = defineParserSettings(); 35 | rowProcessor = defineProcessorSettings(); 36 | parser = new CsvParser(parserSettings); 37 | uniqueLanguageCharacters = new HashMap>(); 38 | allUniqueCharacters = new HashSet(); 39 | } 40 | 41 | public void initialize(ConfigData config) { 42 | this.config = config; 43 | uniqueLanguageCharacters.clear(); 44 | allUniqueCharacters.clear(); 45 | } 46 | 47 | public void run() { 48 | String pathToInputCsv = config.getInPath(); 49 | // Check for input file 50 | if(!fileExists(pathToInputCsv)) { 51 | System.out.println("Missing input file! The input path set in the config is " + pathToInputCsv); 52 | return; 53 | } 54 | 55 | parseInputFile(pathToInputCsv); 56 | extractUniqueCharacters(); 57 | 58 | storeColumnCharacters(); 59 | storeAllCharacters(); 60 | storeCombinationCharacters(); 61 | } 62 | 63 | private void extractUniqueCharacters() { 64 | Map> columnValues = rowProcessor.getColumnValuesAsMapOfNames(); 65 | 66 | for (Map.Entry> columnEntry : columnValues.entrySet()) { 67 | String columnName = columnEntry.getKey(); 68 | String fileName = generateValidFileName(columnName); 69 | if(ignoreColumn(fileName)) { 70 | continue; 71 | } 72 | List entries = columnEntry.getValue(); 73 | System.out.println("Processing column " + columnName); 74 | HashSet uniqueCharacters = getUniqueCharacters(entries); 75 | allUniqueCharacters.addAll(uniqueCharacters); 76 | addIncludeCharacters(uniqueCharacters); 77 | removeExcludeCharacters(uniqueCharacters); 78 | uniqueLanguageCharacters.put(fileName, uniqueCharacters); 79 | } 80 | addIncludeCharacters(allUniqueCharacters); 81 | removeExcludeCharacters(allUniqueCharacters); 82 | } 83 | 84 | private void storeColumnCharacters() { 85 | for (Map.Entry>languageCharacters: uniqueLanguageCharacters.entrySet()) { 86 | String fileName = languageCharacters.getKey(); 87 | HashSet uniqueCharacters = languageCharacters.getValue(); 88 | storeAsText(fileName, uniqueCharacters); 89 | } 90 | } 91 | 92 | private void storeAllCharacters() { 93 | storeAsText("AllUniqueCharacters", allUniqueCharacters); 94 | } 95 | 96 | private void storeCombinationCharacters() { 97 | List combinedOutputTargets = config.getCombinedOutputTargets(); 98 | for (CombinedOutputData combinedOutputData : combinedOutputTargets) { 99 | HashSet uniqueCharacters = new HashSet(); 100 | String combinedDataName = combinedOutputData.getName(); 101 | System.out.println("Processing combined data " + combinedDataName); 102 | 103 | List targetColumns = combinedOutputData.getColumns(); 104 | for (String column : targetColumns) { 105 | if(!uniqueLanguageCharacters.containsKey(column)) { 106 | System.out.println("Missing column " + column + " for combinedColumn " + combinedDataName + " - ignoring column."); 107 | continue; 108 | } 109 | HashSet languageCharacters = uniqueLanguageCharacters.get(column); 110 | uniqueCharacters.addAll(languageCharacters); 111 | } 112 | storeAsText(combinedDataName, uniqueCharacters); 113 | } 114 | } 115 | 116 | private void storeAsText(String fileName, HashSet uniqueCharacters) { 117 | String uniqueCharacterString = getStringRepresentation(uniqueCharacters); 118 | System.out.println(fileName + " has " + uniqueCharacterString.length() + " unique characters"); 119 | writeToFile(config.getOutPath() + fileName + ".txt", uniqueCharacterString); 120 | } 121 | 122 | private String generateValidFileName(String name) { 123 | return name.replaceAll("[\\\\/:*?\"<>|]", "_"); 124 | } 125 | 126 | private boolean ignoreColumn(String columnName) { 127 | return columnName.toLowerCase().equals("id") || columnName.toLowerCase().equals("description"); 128 | } 129 | 130 | private void parseInputFile(String pathToInputCsv) { 131 | parser.parse(getReader(pathToInputCsv)); 132 | } 133 | 134 | private CsvParserSettings defineParserSettings() { 135 | CsvParserSettings parserSettings = new CsvParserSettings(); 136 | parserSettings.getFormat().setLineSeparator("\n"); 137 | parserSettings.setHeaderExtractionEnabled(true); 138 | return parserSettings; 139 | } 140 | 141 | private ColumnProcessor defineProcessorSettings() { 142 | ColumnProcessor rowProcessor = new ColumnProcessor(); 143 | parserSettings.setProcessor(rowProcessor); 144 | return rowProcessor; 145 | } 146 | 147 | private boolean fileExists(String path) { 148 | File file = new File(path); 149 | if(!file.exists()) { 150 | return false; 151 | } 152 | return true; 153 | } 154 | 155 | private void writeToFile(String relativePath, String uniqueCharacterString) { 156 | PrintWriter out = null; 157 | try { 158 | 159 | File file = new File(relativePath); 160 | file.getParentFile().mkdirs(); 161 | if(!file.exists()) { 162 | file.createNewFile(); 163 | } 164 | 165 | out = new PrintWriter(file, "UTF-8"); 166 | out.write(uniqueCharacterString); 167 | } 168 | catch (IOException e) 169 | { 170 | e.printStackTrace(); 171 | } 172 | finally 173 | { 174 | if(out != null) { 175 | out.close(); 176 | } 177 | } 178 | } 179 | 180 | private String getStringRepresentation(HashSet uniqueCharacters) { 181 | StringBuilder builder = new StringBuilder(uniqueCharacters.size()); 182 | for (Character character: uniqueCharacters) { 183 | builder.append(character); 184 | } 185 | return builder.toString(); 186 | } 187 | 188 | private HashSet getUniqueCharacters(List entries) { 189 | boolean includeUpperAndLowercaseVersion = config.isIncludeUpperAndLowerCase(); 190 | HashSet uniqueCharacters = new HashSet(); 191 | for (String entry : entries) { 192 | if(entry == null) { 193 | continue; 194 | } 195 | // Remove emojis 196 | entry = EmojiParser.removeAllEmojis(entry); 197 | // Remove newlines 198 | entry = entry.replaceAll("\n\r", ""); 199 | entry = entry.replaceAll("\n", ""); 200 | char[] characters = entry.toCharArray(); 201 | for (int i = 0; i uniqueCharacters) { 219 | String excludeCharacters= config.getExcludeCharacters(); 220 | 221 | for (int i = 0; i< excludeCharacters.length(); i++) { 222 | char c = excludeCharacters.charAt(i); 223 | uniqueCharacters.remove(c); 224 | } 225 | } 226 | 227 | private void addIncludeCharacters(HashSet uniqueCharacters) { 228 | String includeCharacters = config.getIncludeCharacters(); 229 | 230 | for (int i = 0; i< includeCharacters.length(); i++) { 231 | char c = includeCharacters.charAt(i); 232 | uniqueCharacters.add(c); 233 | } 234 | } 235 | 236 | public Reader getReader(String relativePath) { 237 | try { 238 | InputStream inputStream = new FileInputStream(relativePath); 239 | return new InputStreamReader(inputStream, "UTF-8"); 240 | } catch (Exception e) { 241 | e.printStackTrace(); 242 | return null; 243 | } 244 | } 245 | } 246 | --------------------------------------------------------------------------------