├── modeling ├── run_train.sh ├── test_evaluate.py └── train.py ├── preprocessing ├── get_data.sh └── get_train_data.py ├── requirments.txt ├── utils.py ├── inference.py ├── .gitignore ├── README.md └── LICENSE /modeling/run_train.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python3 train.py -------------------------------------------------------------------------------- /preprocessing/get_data.sh: -------------------------------------------------------------------------------- 1 | wget https://raw.githubusercontent.com/smilegate-ai/korean_smile_style_dataset/main/smilestyle_dataset.tsv -------------------------------------------------------------------------------- /requirments.txt: -------------------------------------------------------------------------------- 1 | torch==1.13.1 2 | transformers==4.26.0 3 | pandas==1.5.3 4 | emoji==2.2.0 5 | soynlp==0.0.493 6 | datasets==2.10.1 7 | pandas==1.5.3 -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import re 2 | import emoji 3 | from soynlp.normalizer import repeat_normalize 4 | 5 | pattern = re.compile(f'[^ .,?!/@$%~%·∼()\x00-\x7Fㄱ-ㅣ가-힣]+') 6 | url_pattern = re.compile( 7 | r'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)') 8 | jamo_pattern = re.compile('[|ㄱ-ㅎ|ㅏ-ㅣ]+') 9 | 10 | 11 | def clean(x): 12 | x = pattern.sub(' ', x) 13 | x = emoji.replace_emoji(x, replace='') # emoji 삭제 14 | x = url_pattern.sub('', x) 15 | x = jamo_pattern.sub('', x) 16 | x = x.strip() 17 | x = repeat_normalize(x, num_repeats=2) 18 | return x 19 | -------------------------------------------------------------------------------- /modeling/test_evaluate.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from datasets import load_metric 3 | import numpy as np 4 | from pathlib import Path 5 | 6 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 7 | import pandas as pd 8 | 9 | # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 10 | device = 'cpu' 11 | 12 | BASE_DIR = Path(__file__).resolve().parent.parent 13 | latest_model_path = BASE_DIR.joinpath( 14 | 'modeling', 'saved_model', 'formal_classifier_latest') 15 | 16 | 17 | class FormalClassifier(object): 18 | def __init__(self): 19 | self.model = AutoModelForSequenceClassification.from_pretrained( 20 | latest_model_path).to(device) 21 | self.tokenizer = AutoTokenizer.from_pretrained('beomi/kcbert-base') 22 | 23 | def predict(self, text: str): 24 | inputs = self.tokenizer( 25 | text, return_tensors="pt", max_length=64, truncation=True, padding="max_length") 26 | input_ids = inputs["input_ids"].to(device) 27 | token_type_ids = inputs["token_type_ids"].to(device) 28 | attentsion_mask = inputs["attention_mask"].to(device) 29 | 30 | model_inputs = { 31 | "input_ids": input_ids, 32 | "token_type_ids": token_type_ids, 33 | "attention_mask": attentsion_mask, 34 | } 35 | return torch.argmax(self.model(**model_inputs).logits, dim=-1) 36 | 37 | 38 | if __name__ == '__main__': 39 | 40 | test = pd.read_csv(BASE_DIR.joinpath( 41 | 'modeling', 'data', 'test.tsv'), sep='\t', index_col=0) 42 | 43 | test = test.dropna() 44 | 45 | metric = load_metric("accuracy") 46 | classifier = FormalClassifier() 47 | 48 | predictions = [classifier.predict(text) 49 | for text in test['sentence'].tolist()] 50 | print(metric.compute(predictions=predictions, 51 | references=test['label'].tolist())) 52 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | import transformers 2 | import torch 3 | from pathlib import Path 4 | 5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 6 | from utils import clean 7 | 8 | BASE_DIR = str(Path(__file__).resolve().parent) 9 | # model_token = os.getenv('MODEL_TOKEN') 10 | 11 | latest_model_path = BASE_DIR + '/modeling/saved_model/formal_classifier_latest' 12 | device = 'cpu' 13 | 14 | # pipeline = transformers.pipeline( 15 | # "text-classification", model=model, tokenizer=tokenizer) 16 | 17 | 18 | class FormalClassifier(object): 19 | def __init__(self): 20 | self.model = AutoModelForSequenceClassification.from_pretrained( 21 | latest_model_path).to(device) 22 | self.tokenizer = AutoTokenizer.from_pretrained('beomi/kcbert-base') 23 | 24 | def predict(self, text: str): 25 | text = clean(text) 26 | inputs = self.tokenizer( 27 | text, return_tensors="pt", max_length=64, truncation=True, padding="max_length") 28 | input_ids = inputs["input_ids"].to(device) 29 | token_type_ids = inputs["token_type_ids"].to(device) 30 | attentsion_mask = inputs["attention_mask"].to(device) 31 | 32 | model_inputs = { 33 | "input_ids": input_ids, 34 | "token_type_ids": token_type_ids, 35 | "attention_mask": attentsion_mask, 36 | } 37 | return torch.softmax(self.model(**model_inputs).logits, dim=-1) 38 | 39 | def is_formal(self, text): 40 | if self.predict(text)[0][1] > self.predict(text)[0][0]: 41 | return True 42 | else: 43 | return False 44 | 45 | def formal_percentage(self, text): 46 | return round(float(self.predict(text)[0][1]), 2) 47 | 48 | def print_message(self, text): 49 | result = self.formal_percentage(text) 50 | if result > 0.5: 51 | print(f'{text} : 존댓말입니다. ( 확률 {result*100}% )') 52 | if result < 0.5: 53 | print(f'{text} : 반말입니다. ( 확률 {((1 - result)*100)}% )') 54 | 55 | 56 | if __name__ == "__main__": 57 | classifier = FormalClassifier() 58 | classifier.print_message("저번에 교수님께서 자료 가져오라고 하셨는데 기억나세요?") 59 | classifier.print_message("저번에 교수님이 자료 가져오라고 하셨는데 기억나?") 60 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.tsv 2 | *.ckpt 3 | modeling/saved_model/ 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | pip-wheel-metadata/ 28 | share/python-wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .nox/ 48 | .coverage 49 | .coverage.* 50 | .cache 51 | nosetests.xml 52 | coverage.xml 53 | *.cover 54 | *.py,cover 55 | .hypothesis/ 56 | .pytest_cache/ 57 | 58 | # Translations 59 | *.mo 60 | *.pot 61 | 62 | # Django stuff: 63 | *.log 64 | local_settings.py 65 | db.sqlite3 66 | db.sqlite3-journal 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | -------------------------------------------------------------------------------- /preprocessing/get_train_data.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import itertools 3 | import os 4 | from typing import Final, List 5 | from pathlib import Path 6 | 7 | smg_df = pd.read_csv("./meta/smilestyle_dataset.tsv", sep="\t") 8 | chat_df = pd.read_csv('./meta/aihub_sentiment_dataset.tsv', sep='\t') 9 | 10 | BASE_DIR = Path(__file__).resolve().parent.parent 11 | EXPORT_DIR = BASE_DIR.joinpath("modeling", "data") 12 | 13 | 14 | def df2sentence(df: pd.DataFrame, cols: List[str]) -> List[str]: 15 | sentence = [df[col].tolist() for col in cols] 16 | sentence = list(itertools.chain(*sentence)) 17 | sentence = [s for s in sentence if type(s) == str] 18 | sentence = [s.split('.') for s in sentence] 19 | sentence = list(itertools.chain(*sentence)) 20 | sentence = [s.strip() for s in sentence if s.strip()] 21 | sentence = [s for s in sentence if len(s) > 5] 22 | return sentence 23 | 24 | 25 | formal_cols = ['formal', 'gentle'] 26 | informal_cols = ['informal', 'chat', 'enfp', 'sosim', 'choding', 'joongding'] 27 | 28 | smg_formal = df2sentence(smg_df, formal_cols) 29 | smg_infomal = df2sentence(smg_df, informal_cols) 30 | 31 | chat_formal = df2sentence(chat_df, ['시스템응답1', '시스템응답2', '시스템응답3', '시스템응답4']) 32 | chat_informal = df2sentence(chat_df, ['사람문장1', '사람문장2', '사람문장3', '사람문장4']) 33 | 34 | formal_data = smg_formal + chat_formal 35 | informal_data = smg_infomal + chat_informal 36 | 37 | # 존댓말 1 , 반말 0 38 | data = pd.concat([pd.DataFrame({'sentence': informal_data, "label": 0}), pd.DataFrame( 39 | {'sentence': formal_data, "label": 1})]) 40 | 41 | # # 토큰화 42 | # tokenizer = PeCab() 43 | # data['sentence'] = data['sentence'].apply(lambda x: tokenizer.tokenize(x)) 44 | 45 | # 셔플 46 | data = data.sample(frac=1) 47 | data.reset_index(drop=True, inplace=True) 48 | 49 | split_rate: Final[float] = 0.1 50 | 51 | # 테스트&검증 데이터 비율 설정 52 | range_ = int(len(data) * split_rate) 53 | 54 | # 데이터 분할 55 | dev = data[:range_] 56 | test = data[range_:range_ * 2] 57 | train = data[range_ * 2:] 58 | 59 | 60 | # 중복 제거 61 | train.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True) 62 | test.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True) 63 | dev.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True) 64 | 65 | 66 | if not os.path.exists(EXPORT_DIR): 67 | os.makedirs(EXPORT_DIR) 68 | 69 | # print("train label rate: ",train['label'].value_counts()) 70 | # print("dev label rate: ",dev['label'].value_counts()) 71 | # print("test label rate: ",test['label'].value_counts()) 72 | 73 | # 데이터 내보내기 74 | train.to_csv(EXPORT_DIR.joinpath("train.tsv"), sep="\t") 75 | dev.to_csv(EXPORT_DIR.joinpath("dev.tsv"), sep="\t") 76 | test.to_csv(EXPORT_DIR.joinpath("test.tsv"), sep="\t") 77 | -------------------------------------------------------------------------------- /modeling/train.py: -------------------------------------------------------------------------------- 1 | from transformers import TrainingArguments 2 | from transformers import Trainer 3 | from datasets import load_metric 4 | import numpy as np 5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 6 | from datasets.dataset_dict import DatasetDict 7 | from datasets import Dataset 8 | 9 | import torch 10 | import pandas as pd 11 | 12 | from typing import Final 13 | from pathlib import Path 14 | 15 | # Base Model (108M) 16 | 17 | device = "cuda:0" if torch.cuda.is_available() else "cpu" 18 | print(device) 19 | 20 | BASE_DIR = Path(__file__).resolve().parent.parent 21 | 22 | 23 | class FormalClassifier: 24 | def __init__(self): 25 | self.model_name = "beomi/kcbert-base" 26 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) 27 | self.model = AutoModelForSequenceClassification.from_pretrained( 28 | self.model_name).to(device) 29 | 30 | self.batch_size: Final[int] = 32 31 | self.max_len: Final[int] = 64 32 | self.dataLoader() 33 | 34 | def tokenize_function(self, examples): 35 | return self.tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=self.max_len) 36 | 37 | def dataLoader(self): 38 | train = pd.read_csv(BASE_DIR.joinpath( 39 | 'modeling', 'data', 'train.tsv'), sep='\t', index_col=0) 40 | dev = pd.read_csv(BASE_DIR.joinpath( 41 | 'modeling', 'data', 'dev.tsv'), sep='\t', index_col=0) 42 | 43 | train = train.dropna() 44 | dev = dev.dropna() 45 | 46 | dataset = DatasetDict({ 47 | 'train': Dataset.from_dict({'sentence': train['sentence'].tolist(), 'label': train['label'].tolist()}), 48 | 'dev': Dataset.from_dict({'sentence': dev['sentence'].tolist(), 'label': dev['label'].tolist()}), 49 | }) 50 | 51 | tokenized_datasets = dataset.map(self.tokenize_function, batched=True) 52 | 53 | self.train_dataset = tokenized_datasets["train"] 54 | self.dev_dataset = tokenized_datasets["dev"] 55 | 56 | def compute_metrics(self, eval_pred): 57 | metric = load_metric("accuracy") 58 | logits, labels = eval_pred 59 | predictions = np.argmax(logits, axis=-1) 60 | return metric.compute(predictions=predictions, references=labels) 61 | 62 | def train(self): 63 | training_args = TrainingArguments("./saved_model", 64 | per_device_train_batch_size=self.batch_size, 65 | num_train_epochs=2, 66 | learning_rate=3e-05, 67 | save_strategy="epoch", 68 | evaluation_strategy="epoch", 69 | fp16=True, 70 | ) 71 | 72 | trainer = Trainer( 73 | model=self.model, 74 | args=training_args, 75 | train_dataset=self.train_dataset, 76 | eval_dataset=self.dev_dataset, 77 | compute_metrics=self.compute_metrics, 78 | ) 79 | 80 | trainer.train() 81 | trainer.evaluate() 82 | 83 | 84 | if __name__ == "__main__": 85 | model = FormalClassifier() 86 | model.train() 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # formal_classifier 2 | formal classifier or honorific classifier 3 | 4 | ## 한국어 존댓말 반말 분류기 5 | 6 | 오래전에 존댓말 , 반말을 한국어 형태소 분석기로 분류하는 간단한 방법을 소개했다.
7 | 하지만 이 방법을 실제로 적용하려 했더니, 많은 부분에서 오류가 발생하였다. 8 | 9 | 예를 들면) 10 | ```bash 11 | 저번에 교수님께서 자료 가져오라했는데 기억나? 12 | ``` 13 | 라는 문구를 "께서"라는 존칭때문에 전체문장을 존댓말로 판단하는 오류가 많이 발생했다.
14 | 그래서 이번에 딥러닝 모델을 만들고 그 과정을 공유해보고자한다. 15 | 16 | #### 빠르게 가져다 쓰실 분들은 아래 코드로 바로 사용하실 수 있습니다. 17 | ```python 18 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline 19 | 20 | model = AutoModelForSequenceClassification.from_pretrained("j5ng/kcbert-formal-classifier") 21 | tokenizer = AutoTokenizer.from_pretrained('j5ng/kcbert-formal-classifier') 22 | 23 | formal_classifier = pipeline(task="text-classification", model=model, tokenizer=tokenizer) 24 | print(formal_classifier("저번에 교수님께서 자료 가져오라했는데 기억나?")) 25 | # [{'label': 'LABEL_0', 'score': 0.9999139308929443}] 26 | ``` 27 | 28 | #### Batch Inference Using Cuda 29 | ```python 30 | import torch 31 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline 32 | from tqdm import tqdm 33 | 34 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 35 | 36 | model_name = "j5ng/kcbert-formal-classifier" 37 | tokenizer = AutoTokenizer.from_pretrained(model_name) 38 | model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device) 39 | 40 | formal_classifier = pipeline( 41 | task="text-classification", 42 | model=model, 43 | tokenizer=tokenizer, 44 | device=0 if torch.cuda.is_available() else -1, 45 | batch_size=128, 46 | ) 47 | 48 | chunk_size = 1000 # 각 청크의 크기를 1000으로 설정 49 | chunks = [sentence[i:i+chunk_size] for i in range(0, len(sentence), chunk_size)] # 텍스트 리스트를 청크로 나눔 50 | 51 | scores = [] 52 | for chunk in tqdm(chunks): 53 | batch_scores = formal_classifier(chunk) 54 | batch_scores = [round(1 - i['score'], 2) if i['label'] == 'LABEL_0' else round(i['score'],2) for i in batch_scores] 55 | scores.extend(batch_scores) 56 | 57 | # print(scores) 58 | 59 | ``` 60 | 61 | *** 62 | 63 | ### 데이터 셋 출처 64 | 65 | #### 스마일게이트 말투 데이터 셋(korean SmileStyle Dataset) 66 | : https://github.com/smilegate-ai/korean_smile_style_dataset 67 | 68 | #### AI 허브 감성 대화 말뭉치 69 | : https://www.aihub.or.kr/ 70 | 71 | #### 데이터셋 다운로드(AI허브는 직접다운로드만 가능) 72 | ```bash 73 | wget https://raw.githubusercontent.com/smilegate-ai/korean_smile_style_dataset/main/smilestyle_dataset.tsv 74 | ``` 75 | 76 | ### 개발 환경 77 | ```bash 78 | Python3.9 79 | ``` 80 | 81 | ```bash 82 | torch==1.13.1 83 | transformers==4.26.0 84 | pandas==1.5.3 85 | emoji==2.2.0 86 | soynlp==0.0.493 87 | datasets==2.10.1 88 | pandas==1.5.3 89 | ``` 90 | 91 | 92 | #### 사용 모델 93 | beomi/kcbert-base 94 | - GitHub : https://github.com/Beomi/KcBERT 95 | - HuggingFace : https://huggingface.co/beomi/kcbert-base 96 | *** 97 | 98 | ## 데이터 99 | ```bash 100 | get_train_data.py 101 | ``` 102 | 103 | ### 예시 104 | |sentence|label| 105 | |------|---| 106 | |공부를 열심히 해도 열심히 한 만큼 성적이 잘 나오지 않아|0| 107 | |아들에게 보내는 문자를 통해 관계가 회복되길 바랄게요|1| 108 | |참 열심히 사신 보람이 있으시네요|1| 109 | |나도 스시 좋아함 이번 달부터 영국 갈 듯|0| 110 | |본부장님이 내가 할 수 없는 업무를 계속 주셔서 힘들어|0| 111 | 112 | 113 | ### 분포 114 | |label|train|test| 115 | |------|---|---| 116 | |0|133,430|34,908| 117 | |1|112,828|29,839| 118 | 119 | *** 120 | 121 | ## 학습(train) 122 | ```bash 123 | python3 modeling/train.py 124 | ``` 125 | 126 | *** 127 | 128 | ## 예측(inference) 129 | ```bash 130 | python3 inference.py 131 | ``` 132 | 133 | ```python 134 | def formal_percentage(self, text): 135 | return round(float(self.predict(text)[0][1]), 2) 136 | 137 | def print_message(self, text): 138 | result = self.formal_persentage(text) 139 | if result > 0.5: 140 | print(f'{text} : 존댓말입니다. ( 확률 {result*100}% )') 141 | if result < 0.5: 142 | print(f'{text} : 반말입니다. ( 확률 {((1 - result)*100)}% )') 143 | ``` 144 | 145 | 결과 146 | ``` 147 | 저번에 교수님께서 자료 가져오라하셨는데 기억나세요? : 존댓말입니다. ( 확률 99.19% ) 148 | 저번에 교수님께서 자료 가져오라했는데 기억나? : 반말입니다. ( 확률 92.86% ) 149 | ``` 150 | 151 | 152 | 153 | *** 154 | 155 | ## 인용 156 | ```bash 157 | @misc{SmilegateAI2022KoreanSmileStyleDataset, 158 | title = {SmileStyle: Parallel Style-variant Corpus for Korean Multi-turn Chat Text Dataset}, 159 | author = {Seonghyun Kim}, 160 | year = {2022}, 161 | howpublished = {\url{https://github.com/smilegate-ai/korean_smile_style_dataset}}, 162 | } 163 | ``` 164 | 165 | ```bash 166 | @inproceedings{lee2020kcbert, 167 | title={KcBERT: Korean Comments BERT}, 168 | author={Lee, Junbum}, 169 | booktitle={Proceedings of the 32nd Annual Conference on Human and Cognitive Language Technology}, 170 | pages={437--440}, 171 | year={2020} 172 | } 173 | ``` 174 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------