├── .gitignore ├── Common.py ├── README.md ├── SG.py ├── User.py ├── load_data_from_file.py └── load_data_with_sqlalchemy.py /.gitignore: -------------------------------------------------------------------------------- 1 | # no way it couldn`t work! 2 | *.pyc 3 | *.md~ -------------------------------------------------------------------------------- /Common.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | """ 4 | from __future__ import print_function 5 | 6 | import sys 7 | import sqlalchemy 8 | from User import user_model 9 | 10 | 11 | def get_DB_engine(): 12 | from User import engine 13 | return engine 14 | 15 | 16 | def init_User_table(model=user_model): 17 | engine = get_DB_engine() 18 | if engine.has_table(model.__tablename__) == False: 19 | model.metadata.create_all(engine) 20 | sys.stdout.write(" table {0} init success!\n".format(model.__tablename__)) 21 | return True 22 | 23 | 24 | def get_DB_session(engine=get_DB_engine()): 25 | from sqlalchemy.orm import sessionmaker 26 | Session = sessionmaker(bind=engine) 27 | session = Session() 28 | return session 29 | 30 | 31 | def preConstruct(form_line): 32 | """ 33 | parse the line with '|',the first parameter is seperator 34 | """ 35 | sep,form = form_line.split(' ') 36 | columns = [col_name for col_name in form.split('|') if col_name != ''] 37 | return sep,columns 38 | 39 | 40 | def check_col_name(columns): 41 | table_columns = ['username','username_zh','password','email', 42 | 'identify_number','cell_phone','ip_addr','living_place','ignore'] 43 | for c in columns: 44 | if c not in table_columns: 45 | raise Exception('invalid column name {0}'.format(c)) 46 | return False 47 | return True 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SEDB 2 | 3 | *这是一个整理社工库的工具,定义了一套DSL语言,对于不同的格式只需要简单配置一行就可以自动化导入数据库,也支持整理成入库的SQL语句,从文件导入,速度更快,相对操作也较为繁琐。* 4 | 5 | ## 用法: 6 | ### User.py 建表 7 | 8 | ## 模板: 9 | 分隔符: 支持常见字符如SPACE(空格), | : 等,也可以自定义 10 | 11 | ## 数据行描述: 12 | 13 | ### 支持的数据格式(详见User.py) 14 | * username 15 | * username_zh 中文名字 16 | * passwd 明文密码 17 | * email 18 | * identify_number 身份证18位 19 | * cell_phone 手机号码 20 | * ip_addr ip地址 21 | * living_place 住址 22 | 23 | ### 数据格式描述示例 24 | 25 | *分隔符 数据描述(以'|'分割,ignore是需要忽略的数据项)* 26 | - SPACE username|password|cell_phone| 27 | - , username|password|identify_number|ignore|ignore|email 28 | 29 | ### **如果希望丰富数据项,可以修改User.py中的model定义,即可自动支持。** 30 | ## 示例用法: 31 | 32 | ### 模板内容 33 | * $ cat kuzi.tmpl 34 | * , username|password|ignore|ignore|cell_phone 35 | ### 数据格式 36 | * $ head kuzi.txt 37 | * user1,123456,nihao,10.0.1.1,13111110000 38 | * user2,123456,wtf,192.32.1.199,13111110001 39 | * ... 40 | 41 | ### 执行: 42 | * python SG.py kuzi.txt kuzi.tmpl 43 | 44 | #唰~~ -------------------------------------------------------------------------------- /SG.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | """ 4 | from __future__ import print_function 5 | 6 | import sys 7 | import sqlalchemy 8 | from User import user_model 9 | from Common import get_DB_engine,init_User_table,get_DB_session,preConstruct 10 | 11 | 12 | def gen_parse_class(f_name): 13 | """ 14 | """ 15 | # Read the first line 16 | with open(f_name) as f: 17 | sep,columns = preConstruct(f.readline()) 18 | 19 | if sep == 'SPACE': 20 | sep = ' ' 21 | columns = [c.strip() for c in columns] 22 | 23 | # check column name 24 | if not check_col_name(columns): raise Exception('Invalid column name!') 25 | 26 | # inner define function 27 | def parse_line_to_dic(self,line): 28 | """ 29 | The generated class __init__() 30 | """ 31 | row_elements = line.split(sep) 32 | row_elements = [r.strip() for r in row_elements] 33 | kargs = {} 34 | for ele_i in range(len(columns)): 35 | try: 36 | if columns[ele_i] == 'ignore': 37 | pass 38 | else: 39 | kargs[columns[ele_i]] = row_elements[ele_i] 40 | except Exception,e: 41 | sys.stdout.write(e) 42 | # do the init 43 | super(spec_user_model,self).__init__(**kargs) 44 | 45 | # inner define class 46 | spec_user_model = type('spec_user_model',(user_model,),{'__init__':parse_line_to_dic}) 47 | 48 | # return class like this: 49 | """ 50 | Class spec_user_model(user_model): 51 | def __init__(self,): 52 | row_elements = line.split(sep) 53 | kargs = {} 54 | 55 | for ele_i in range(len(row_elements)): 56 | kargs[ele_i] = row_elements[ele_i] 57 | # do the init 58 | super().__init__(**kargs) 59 | """ 60 | # usage(): 61 | # user = spec_user_model(username='user',password='pass',...) 62 | # session.commit(user) 63 | return spec_user_model 64 | 65 | 66 | # do one job 67 | def save_to_db(fname,user_class,single_file=False,lines_num=1000): 68 | """ 69 | """ 70 | sess = get_DB_session() 71 | #sess.autocommit = True 72 | with open(fname) as f: 73 | if single_file==True: 74 | f.readline() 75 | while True: 76 | line_n = 0 77 | for line in f: 78 | try: 79 | user = user_class(line) 80 | sess.add(user) 81 | line_n += 1 82 | 83 | if line_n % lines_num == 0: 84 | sess.commit() 85 | print("\r Processed {0} lines\r".format(line_n),file=sys.stdout,end=" ") 86 | sys.stdout.flush() 87 | except Exception,e: 88 | print("exception found :\n line:{0}\n content:{1} exception:{2}\n".format(line_n,line,e)) 89 | sess.commit() 90 | print("\n {0} datalines Finished\r".format(line_n),file=sys.stdout) 91 | break 92 | return True 93 | 94 | # do all dirty jobs 95 | def save_all_to_db(): 96 | init_User_table() 97 | if len(sys.argv) == 2: 98 | user_class = gen_parse_class(sys.argv[1]) 99 | save_to_db(sys.argv[1],user_class,single_file=True) 100 | elif len(sys.argv) == 3: 101 | user_class = gen_parse_class(sys.argv[2]) 102 | save_to_db(sys.argv[1],user_class,single_file=False) 103 | elif len(sys.argv) == 4: 104 | user_class = gen_parse_class(sys.argv[2]) 105 | save_to_db(sys.argv[1],user_class,single_file=True) 106 | else: 107 | raise Exception('Error argument number!') 108 | 109 | # build column line 110 | def build_col_line(columns): 111 | line = "" 112 | for c in columns: 113 | # init 114 | if line == '': 115 | line = '{0}'.format(c) 116 | continue 117 | # ignore case 118 | if c == 'ignore': 119 | line = "{0},@dummy".format(line) 120 | # append col 121 | else: 122 | line = "{0},{1}".format(line,c) 123 | return line 124 | 125 | # build sql string 126 | def build_sql_by_tmpl(source_file,template,single_file=False,model=user_model): 127 | """ 128 | """ 129 | if single_file == True: 130 | template = source_file 131 | ignore_lines = 'IGNORE 1 LINES' 132 | else: 133 | ignore_lines = '' 134 | 135 | with open(template) as f: 136 | sep,columns = preConstuct(f.readline()) 137 | 138 | if sep == 'SPACE': 139 | sep = ' ' 140 | columns = [c.strip() for c in columns] 141 | if not check_col_name(columns): return False 142 | 143 | col_line = build_col_line(columns) 144 | table_name = model.__tablename__ 145 | 146 | sql_query = """LOAD DATA LOCAL INFILE '{0}' 147 | INTO TABLE {1} 148 | FIELDS TERMINATED BY '{2}' 149 | LINES TERMINATED BY '\\r\\n' 150 | {3} 151 | ({4}); 152 | """.format( 153 | source_file, 154 | table_name, 155 | sep, 156 | ignore_lines, 157 | col_line 158 | ) 159 | return sql_query 160 | 161 | 162 | def do_load_data_from_file(): 163 | init_User_table() 164 | if len(sys.argv) == 2: 165 | do_load_data_from_file(sys.argv[1],sys.argv[1],single_file=True) 166 | elif len(sys.argv) == 3: 167 | do_load_data_from_file(sys.argv[1],sys.argv[2],single_file=False) 168 | elif len(sys.argv) == 4: 169 | do_load_data_from_file(sys.argv[1],sys.argv[2],single_file=True) 170 | else: 171 | raise Exception('Error argument number!') 172 | 173 | if __name__ == '__main__': 174 | save_all_to_db() 175 | -------------------------------------------------------------------------------- /User.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | create table if not exists `users` ( 4 | `id` int(10) unsigned not null auto_increment, 5 | `username` varchar(32) default '', 6 | `username_zh` varchar(32) default '', 7 | `password` varchar(32) default '', 8 | `email` varchar(64) default '', 9 | `identity_number` varchar(18) default '', 10 | `cell_phone` varchar(11) default '', 11 | `ip_addr` varchar(32) default '', 12 | `living_place` varchar(100) default '', 13 | PRIMARY KEY (`id`)) ENGINE=MyISAM DEFAULT CHARSET=utf8; 14 | 15 | alter table `users` ( 16 | `id` int(10) unsigned not null auto_increment, 17 | `username` varchar(32) default '', 18 | `username_zh` varchar(32) default '', 19 | `password` varchar(32) default '', 20 | `email` varchar(64) default '', 21 | `identity_number` varchar(18) default '', 22 | `cell_phone` varchar(11) default '', 23 | `ip_addr` varchar(32) default '', 24 | `living_place` varchar(100) default '', 25 | PRIMARY KEY (`id`)) ENGINE=MyISAM DEFAULT CHARSET=utf8; 26 | 27 | """ 28 | from sqlalchemy.ext.declarative import declarative_base 29 | from sqlalchemy import Column, Integer, String 30 | from sqlalchemy import create_engine 31 | 32 | Base = declarative_base() 33 | engine = create_engine('mysql://root:toor@127.0.0.1:3306/sdb?charset=utf8') 34 | 35 | class user_model(Base): 36 | __tablename__ = 'users' 37 | __table_args__ = {'mysql_engine':'MyISAM','mysql_charset':'utf8'} 38 | 39 | id = Column(Integer, primary_key=True,autoincrement=True) 40 | username = Column(String(32)) 41 | username_zh = Column(String(32)) 42 | password = Column(String(50)) 43 | email = Column(String(64)) 44 | identity_number = Column(String(18)) 45 | cell_phone = Column(String(11)) 46 | ip_addr = Column(String(32)) 47 | living_place = Column(String(100)) 48 | -------------------------------------------------------------------------------- /load_data_from_file.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | """ 4 | from __future__ import print_function 5 | 6 | import sys 7 | import sqlalchemy 8 | from User import user_model 9 | from Common import get_DB_engine,init_User_table,get_DB_session,preConstruct,check_col_name 10 | 11 | 12 | # build column line 13 | def build_col_line(columns): 14 | line = "" 15 | for c in columns: 16 | # init 17 | if line == '': 18 | if c == 'ignore': 19 | line = '{0}'.format('@dummy') 20 | else: 21 | line = '{0}'.format(c) 22 | continue 23 | # ignore case 24 | if c == 'ignore': 25 | line = "{0},@dummy".format(line) 26 | # append col 27 | else: 28 | line = "{0},{1}".format(line,c) 29 | return line 30 | 31 | 32 | # build sql string 33 | def build_sql_by_tmpl(source_file,template,single_file=False,model=user_model): 34 | """ 35 | """ 36 | if single_file == True: 37 | template = source_file 38 | ignore_lines = 'IGNORE 1 LINES' 39 | else: 40 | ignore_lines = '' 41 | 42 | with open(template) as f: 43 | sep,columns = preConstruct(f.readline()) 44 | 45 | if sep == 'SPACE': 46 | sep = ' ' 47 | columns = [c.strip() for c in columns] 48 | if not check_col_name(columns): return False 49 | 50 | col_line = build_col_line(columns) 51 | table_name = model.__tablename__ 52 | 53 | sql_query = """LOAD DATA LOCAL INFILE '{0}' 54 | INTO TABLE {1} 55 | FIELDS TERMINATED BY '{2}' 56 | OPTIONALLY ENCLOSED BY '\\'' 57 | LINES TERMINATED BY '\\r\\n' 58 | {3} 59 | ({4}); 60 | """.format( 61 | source_file, 62 | table_name, 63 | sep, 64 | ignore_lines, 65 | col_line 66 | ) 67 | return sql_query 68 | 69 | 70 | def do_exc_sql_loadfile(sql): 71 | sess = get_DB_engine() 72 | print(sql) 73 | #sess.execute(sql) 74 | 75 | def do_load_data_from_file(): 76 | import os 77 | pwd = os.getcwd() 78 | source_file = pwd+'/'+sys.argv[1] 79 | templ = pwd+'/'+sys.argv[2] 80 | 81 | init_User_table() 82 | if len(sys.argv) == 2: 83 | sql = build_sql_by_tmpl(source_file,templ,single_file=True) 84 | do_exc_sql_loadfile(sql) 85 | elif len(sys.argv) == 3: 86 | sql = build_sql_by_tmpl(source_file,templ,single_file=False) 87 | do_exc_sql_loadfile(sql) 88 | elif len(sys.argv) == 4: 89 | sql = build_sql_by_tmpl(source_file,templ,single_file=True) 90 | do_exc_sql_loadfile(sql) 91 | else: 92 | raise Exception('Error argument number!') 93 | 94 | 95 | if __name__ == '__main__': 96 | do_load_data_from_file() 97 | -------------------------------------------------------------------------------- /load_data_with_sqlalchemy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | """ 4 | from __future__ import print_function 5 | 6 | import sys 7 | import traceback 8 | import sqlalchemy 9 | from User import user_model 10 | from Common import get_DB_engine,init_User_table,get_DB_session,preConstruct,check_col_name 11 | 12 | 13 | def gen_parse_class(f_name): 14 | """ 15 | """ 16 | # Read the first line 17 | with open(f_name) as f: 18 | sep,columns = preConstruct(f.readline()) 19 | 20 | if sep == 'SPACE': 21 | sep = ' ' 22 | columns = [c.strip() for c in columns] 23 | 24 | # check column name 25 | if not check_col_name(columns): raise Exception('Invalid column name!') 26 | 27 | # inner define function 28 | def parse_line_to_dic(self,line): 29 | """ 30 | The generated class __init__() 31 | """ 32 | row_elements = line.split(sep) 33 | row_elements = [r.strip() for r in row_elements] 34 | kargs = {} 35 | for ele_i in range(len(columns)): 36 | try: 37 | if columns[ele_i] == 'ignore': 38 | pass 39 | else: 40 | kargs[columns[ele_i]] = row_elements[ele_i] 41 | except Exception,e: 42 | sys.stdout.write('\n {0} \r'.format(e)) 43 | #print(ele_i,columns,row_elements) 44 | #print(e) 45 | # do the init 46 | super(spec_user_model,self).__init__(**kargs) 47 | 48 | # inner define class 49 | spec_user_model = type('spec_user_model',(user_model,),{'__init__':parse_line_to_dic}) 50 | 51 | # return class like this: 52 | """ 53 | Class spec_user_model(user_model): 54 | def __init__(self,): 55 | row_elements = line.split(sep) 56 | kargs = {} 57 | 58 | for ele_i in range(len(row_elements)): 59 | kargs[ele_i] = row_elements[ele_i] 60 | # do the init 61 | super().__init__(**kargs) 62 | """ 63 | # usage(): 64 | # user = spec_user_model(username='user',password='pass',...) 65 | # session.commit(user) 66 | return spec_user_model 67 | 68 | # do one job 69 | def save_to_db(fname,user_class,single_file=False,lines_num=1000): 70 | """ 71 | save 72 | """ 73 | sess = get_DB_session() 74 | #sess.autocommit = True 75 | with open(fname) as f: 76 | if single_file==True: 77 | print(f.readline()) 78 | line_n = 0 79 | while True: 80 | for line in f: 81 | try: 82 | #print(line,type(line)) 83 | user = user_class(line) 84 | sess.add(user) 85 | line_n += 1 86 | 87 | if line_n % lines_num == 0: 88 | sess.commit() 89 | print("\r Processed {0} lines\r".format(line_n),file=sys.stdout,end=" ") 90 | sys.stdout.flush() 91 | except Exception,e: 92 | print("exception found :\n line:{0}\n content:{1} exception:{2}\n".format(line_n,line,e)) 93 | sess.commit() 94 | print("\n {0} datalines Finished\r".format(line_n),file=sys.stdout) 95 | break 96 | return True 97 | 98 | # do all dirty jobs 99 | def save_all_to_db(): 100 | init_User_table() 101 | import os 102 | pwd = os.getcwd() 103 | source_file = pwd+'/'+sys.argv[1] 104 | templ = pwd+'/'+sys.argv[2] 105 | 106 | if len(sys.argv) == 2: 107 | user_class = gen_parse_class(source_file) 108 | save_to_db(source_file,user_class,single_file=True) 109 | elif len(sys.argv) == 3: 110 | user_class = gen_parse_class(templ) 111 | save_to_db(source_file,user_class,single_file=False) 112 | elif len(sys.argv) == 4: 113 | user_class = gen_parse_class(templ) 114 | save_to_db(source_file,user_class,single_file=True) 115 | else: 116 | raise Exception('Error argument number!') 117 | 118 | 119 | if __name__ == '__main__': 120 | save_all_to_db() 121 | --------------------------------------------------------------------------------