├── .gitignore ├── README.md ├── SPMF-LICENSE.txt ├── example_input.txt ├── example_output.txt ├── input.txt ├── output.txt ├── spmf.jar └── vmsp.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Maximal Sequential Patterns Mining 2 | 3 | A python wrapper for the VMSP algorithm for mining maximal sequential patterns provided by SPMF 4 | 5 | Detailed documentation: http://www.philippe-fournier-viger.com/spmf/index.php?link=documentation.php#vmsp 6 | -------------------------------------------------------------------------------- /SPMF-LICENSE.txt: -------------------------------------------------------------------------------- 1 | License Agreement 2 | 3 | SPMF is copyright by Philippe Fournier-Viger and some parts are copyright by contributors. 4 | 5 | The code is licensed under the open-source GNU GPL version 3 license. The GPL license provides four freedoms: 6 | 7 | Obtain and run the program for any purpose 8 | Get a copy of the source code 9 | Modify the source code 10 | Re-distribute the modified source code 11 | The only restriction is that if you want to redistribute the source code, you must: 12 | 13 | provide access to the source code, 14 | license derived work under the same GPL v3 license 15 | Besides, if SPMF is used in a scientific publication, please cite the following publication: 16 | 17 | Fournier-Viger, P., Lin, C.W., Gomariz, A., Gueniche, T., Soltani, A., Deng, Z., Lam, H. T. (2016). The SPMF Open-Source Data Mining Library Version 2. Proc. 19th European Conference on Principles of Data Mining and Knowledge Discovery (PKDD 2016) Part III, Springer LNCS 9853, pp. 36-40. 18 | 19 | If you have any questions, please contact Philippe Fournier-Viger. -------------------------------------------------------------------------------- /example_input.txt: -------------------------------------------------------------------------------- 1 | 1 -1 1 2 3 -1 1 3 -1 4 -1 3 6 -1 -2 2 | 1 4 -1 3 -1 2 3 -1 1 5 -1 -2 3 | 5 6 -1 1 2 -1 4 6 -1 3 -1 2 -1 -2 4 | 5 -1 7 -1 1 6 -1 3 -1 2 -1 3 -1 -2 -------------------------------------------------------------------------------- /example_output.txt: -------------------------------------------------------------------------------- 1 | 6 -1 2 -1 3 -1 #SUP: 2 2 | 5 -1 2 -1 3 -1 #SUP: 2 3 | 4 -1 3 -1 2 -1 #SUP: 2 4 | 1 2 -1 6 -1 #SUP: 2 5 | 1 -1 3 -1 3 -1 #SUP: 3 6 | 1 -1 2 -1 3 -1 #SUP: 2 7 | 5 -1 6 -1 3 -1 2 -1 #SUP: 2 8 | 5 -1 1 -1 3 -1 2 -1 #SUP: 2 9 | 1 2 -1 4 -1 3 -1 #SUP: 2 10 | 1 -1 2 3 -1 1 -1 #SUP: 2 11 | -------------------------------------------------------------------------------- /input.txt: -------------------------------------------------------------------------------- 1 | 1 -1 1 2 3 -1 1 3 -1 4 -1 3 6 -1 -2 2 | 1 4 -1 3 -1 2 3 -1 1 5 -1 -2 3 | 5 6 -1 1 2 -1 4 6 -1 3 -1 2 -1 -2 4 | 5 -1 7 -1 1 6 -1 3 -1 2 -1 3 -1 -2 -------------------------------------------------------------------------------- /output.txt: -------------------------------------------------------------------------------- 1 | 6 -1 2 -1 3 -1 #SUP: 2 2 | 5 -1 2 -1 3 -1 #SUP: 2 3 | 4 -1 3 -1 2 -1 #SUP: 2 4 | 1 2 -1 6 -1 #SUP: 2 5 | 1 -1 3 -1 3 -1 #SUP: 3 6 | 1 -1 2 -1 3 -1 #SUP: 2 7 | 5 -1 6 -1 3 -1 2 -1 #SUP: 2 8 | 5 -1 1 -1 3 -1 2 -1 #SUP: 2 9 | 1 2 -1 4 -1 3 -1 #SUP: 2 10 | 1 -1 2 3 -1 1 -1 #SUP: 2 11 | -------------------------------------------------------------------------------- /spmf.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fandu/maximal-sequential-patterns-mining/c7eae01942c42840512163ea0424d9e2d4317198/spmf.jar -------------------------------------------------------------------------------- /vmsp.py: -------------------------------------------------------------------------------- 1 | # NO INTERNAL REFERENCE 2 | import subprocess 3 | 4 | 5 | class Vmsp: 6 | def __init__(self): 7 | self._executable = "spmf.jar" 8 | self._input = "input.txt" 9 | self._output = "output.txt" 10 | 11 | def run(self, min_supp=0.5): 12 | # java -jar spmf.jar run VMSP contextPrefixSpan.txt output.txt 50% 13 | subprocess.call(["java", "-jar", self._executable, "run", "VMSP", self._input, self._output, str(min_supp)]) 14 | 15 | def encode_input(self, data): 16 | pass 17 | 18 | def decode_output(self): 19 | # read 20 | lines = [] 21 | try: 22 | with open(self._output, "rU") as f: 23 | lines = f.readlines() 24 | except: 25 | print "read_output error" 26 | 27 | # decode 28 | patterns = [] 29 | for line in lines: 30 | line = line.strip() 31 | patterns.append(line.split(" -1 ")) 32 | 33 | return patterns 34 | 35 | 36 | if __name__ == "__main__": 37 | vmsp = Vmsp() 38 | vmsp.encode_input([]) 39 | vmsp.run() 40 | print vmsp.decode_output() 41 | --------------------------------------------------------------------------------