├── modeling
├── run_train.sh
├── test_evaluate.py
└── train.py
├── preprocessing
├── get_data.sh
└── get_train_data.py
├── requirments.txt
├── utils.py
├── inference.py
├── .gitignore
├── README.md
└── LICENSE
/modeling/run_train.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python3 train.py
--------------------------------------------------------------------------------
/preprocessing/get_data.sh:
--------------------------------------------------------------------------------
1 | wget https://raw.githubusercontent.com/smilegate-ai/korean_smile_style_dataset/main/smilestyle_dataset.tsv
--------------------------------------------------------------------------------
/requirments.txt:
--------------------------------------------------------------------------------
1 | torch==1.13.1
2 | transformers==4.26.0
3 | pandas==1.5.3
4 | emoji==2.2.0
5 | soynlp==0.0.493
6 | datasets==2.10.1
7 | pandas==1.5.3
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import re
2 | import emoji
3 | from soynlp.normalizer import repeat_normalize
4 |
5 | pattern = re.compile(f'[^ .,?!/@$%~%·∼()\x00-\x7Fㄱ-ㅣ가-힣]+')
6 | url_pattern = re.compile(
7 | r'https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
8 | jamo_pattern = re.compile('[|ㄱ-ㅎ|ㅏ-ㅣ]+')
9 |
10 |
11 | def clean(x):
12 | x = pattern.sub(' ', x)
13 | x = emoji.replace_emoji(x, replace='') # emoji 삭제
14 | x = url_pattern.sub('', x)
15 | x = jamo_pattern.sub('', x)
16 | x = x.strip()
17 | x = repeat_normalize(x, num_repeats=2)
18 | return x
19 |
--------------------------------------------------------------------------------
/modeling/test_evaluate.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from datasets import load_metric
3 | import numpy as np
4 | from pathlib import Path
5 |
6 | from transformers import AutoTokenizer, AutoModelForSequenceClassification
7 | import pandas as pd
8 |
9 | # device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
10 | device = 'cpu'
11 |
12 | BASE_DIR = Path(__file__).resolve().parent.parent
13 | latest_model_path = BASE_DIR.joinpath(
14 | 'modeling', 'saved_model', 'formal_classifier_latest')
15 |
16 |
17 | class FormalClassifier(object):
18 | def __init__(self):
19 | self.model = AutoModelForSequenceClassification.from_pretrained(
20 | latest_model_path).to(device)
21 | self.tokenizer = AutoTokenizer.from_pretrained('beomi/kcbert-base')
22 |
23 | def predict(self, text: str):
24 | inputs = self.tokenizer(
25 | text, return_tensors="pt", max_length=64, truncation=True, padding="max_length")
26 | input_ids = inputs["input_ids"].to(device)
27 | token_type_ids = inputs["token_type_ids"].to(device)
28 | attentsion_mask = inputs["attention_mask"].to(device)
29 |
30 | model_inputs = {
31 | "input_ids": input_ids,
32 | "token_type_ids": token_type_ids,
33 | "attention_mask": attentsion_mask,
34 | }
35 | return torch.argmax(self.model(**model_inputs).logits, dim=-1)
36 |
37 |
38 | if __name__ == '__main__':
39 |
40 | test = pd.read_csv(BASE_DIR.joinpath(
41 | 'modeling', 'data', 'test.tsv'), sep='\t', index_col=0)
42 |
43 | test = test.dropna()
44 |
45 | metric = load_metric("accuracy")
46 | classifier = FormalClassifier()
47 |
48 | predictions = [classifier.predict(text)
49 | for text in test['sentence'].tolist()]
50 | print(metric.compute(predictions=predictions,
51 | references=test['label'].tolist()))
52 |
--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
1 | import transformers
2 | import torch
3 | from pathlib import Path
4 |
5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification
6 | from utils import clean
7 |
8 | BASE_DIR = str(Path(__file__).resolve().parent)
9 | # model_token = os.getenv('MODEL_TOKEN')
10 |
11 | latest_model_path = BASE_DIR + '/modeling/saved_model/formal_classifier_latest'
12 | device = 'cpu'
13 |
14 | # pipeline = transformers.pipeline(
15 | # "text-classification", model=model, tokenizer=tokenizer)
16 |
17 |
18 | class FormalClassifier(object):
19 | def __init__(self):
20 | self.model = AutoModelForSequenceClassification.from_pretrained(
21 | latest_model_path).to(device)
22 | self.tokenizer = AutoTokenizer.from_pretrained('beomi/kcbert-base')
23 |
24 | def predict(self, text: str):
25 | text = clean(text)
26 | inputs = self.tokenizer(
27 | text, return_tensors="pt", max_length=64, truncation=True, padding="max_length")
28 | input_ids = inputs["input_ids"].to(device)
29 | token_type_ids = inputs["token_type_ids"].to(device)
30 | attentsion_mask = inputs["attention_mask"].to(device)
31 |
32 | model_inputs = {
33 | "input_ids": input_ids,
34 | "token_type_ids": token_type_ids,
35 | "attention_mask": attentsion_mask,
36 | }
37 | return torch.softmax(self.model(**model_inputs).logits, dim=-1)
38 |
39 | def is_formal(self, text):
40 | if self.predict(text)[0][1] > self.predict(text)[0][0]:
41 | return True
42 | else:
43 | return False
44 |
45 | def formal_percentage(self, text):
46 | return round(float(self.predict(text)[0][1]), 2)
47 |
48 | def print_message(self, text):
49 | result = self.formal_percentage(text)
50 | if result > 0.5:
51 | print(f'{text} : 존댓말입니다. ( 확률 {result*100}% )')
52 | if result < 0.5:
53 | print(f'{text} : 반말입니다. ( 확률 {((1 - result)*100)}% )')
54 |
55 |
56 | if __name__ == "__main__":
57 | classifier = FormalClassifier()
58 | classifier.print_message("저번에 교수님께서 자료 가져오라고 하셨는데 기억나세요?")
59 | classifier.print_message("저번에 교수님이 자료 가져오라고 하셨는데 기억나?")
60 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.tsv
2 | *.ckpt
3 | modeling/saved_model/
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | pip-wheel-metadata/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 |
--------------------------------------------------------------------------------
/preprocessing/get_train_data.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import itertools
3 | import os
4 | from typing import Final, List
5 | from pathlib import Path
6 |
7 | smg_df = pd.read_csv("./meta/smilestyle_dataset.tsv", sep="\t")
8 | chat_df = pd.read_csv('./meta/aihub_sentiment_dataset.tsv', sep='\t')
9 |
10 | BASE_DIR = Path(__file__).resolve().parent.parent
11 | EXPORT_DIR = BASE_DIR.joinpath("modeling", "data")
12 |
13 |
14 | def df2sentence(df: pd.DataFrame, cols: List[str]) -> List[str]:
15 | sentence = [df[col].tolist() for col in cols]
16 | sentence = list(itertools.chain(*sentence))
17 | sentence = [s for s in sentence if type(s) == str]
18 | sentence = [s.split('.') for s in sentence]
19 | sentence = list(itertools.chain(*sentence))
20 | sentence = [s.strip() for s in sentence if s.strip()]
21 | sentence = [s for s in sentence if len(s) > 5]
22 | return sentence
23 |
24 |
25 | formal_cols = ['formal', 'gentle']
26 | informal_cols = ['informal', 'chat', 'enfp', 'sosim', 'choding', 'joongding']
27 |
28 | smg_formal = df2sentence(smg_df, formal_cols)
29 | smg_infomal = df2sentence(smg_df, informal_cols)
30 |
31 | chat_formal = df2sentence(chat_df, ['시스템응답1', '시스템응답2', '시스템응답3', '시스템응답4'])
32 | chat_informal = df2sentence(chat_df, ['사람문장1', '사람문장2', '사람문장3', '사람문장4'])
33 |
34 | formal_data = smg_formal + chat_formal
35 | informal_data = smg_infomal + chat_informal
36 |
37 | # 존댓말 1 , 반말 0
38 | data = pd.concat([pd.DataFrame({'sentence': informal_data, "label": 0}), pd.DataFrame(
39 | {'sentence': formal_data, "label": 1})])
40 |
41 | # # 토큰화
42 | # tokenizer = PeCab()
43 | # data['sentence'] = data['sentence'].apply(lambda x: tokenizer.tokenize(x))
44 |
45 | # 셔플
46 | data = data.sample(frac=1)
47 | data.reset_index(drop=True, inplace=True)
48 |
49 | split_rate: Final[float] = 0.1
50 |
51 | # 테스트&검증 데이터 비율 설정
52 | range_ = int(len(data) * split_rate)
53 |
54 | # 데이터 분할
55 | dev = data[:range_]
56 | test = data[range_:range_ * 2]
57 | train = data[range_ * 2:]
58 |
59 |
60 | # 중복 제거
61 | train.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True)
62 | test.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True)
63 | dev.drop_duplicates(subset=['sentence'], inplace=True, ignore_index=True)
64 |
65 |
66 | if not os.path.exists(EXPORT_DIR):
67 | os.makedirs(EXPORT_DIR)
68 |
69 | # print("train label rate: ",train['label'].value_counts())
70 | # print("dev label rate: ",dev['label'].value_counts())
71 | # print("test label rate: ",test['label'].value_counts())
72 |
73 | # 데이터 내보내기
74 | train.to_csv(EXPORT_DIR.joinpath("train.tsv"), sep="\t")
75 | dev.to_csv(EXPORT_DIR.joinpath("dev.tsv"), sep="\t")
76 | test.to_csv(EXPORT_DIR.joinpath("test.tsv"), sep="\t")
77 |
--------------------------------------------------------------------------------
/modeling/train.py:
--------------------------------------------------------------------------------
1 | from transformers import TrainingArguments
2 | from transformers import Trainer
3 | from datasets import load_metric
4 | import numpy as np
5 | from transformers import AutoTokenizer, AutoModelForSequenceClassification
6 | from datasets.dataset_dict import DatasetDict
7 | from datasets import Dataset
8 |
9 | import torch
10 | import pandas as pd
11 |
12 | from typing import Final
13 | from pathlib import Path
14 |
15 | # Base Model (108M)
16 |
17 | device = "cuda:0" if torch.cuda.is_available() else "cpu"
18 | print(device)
19 |
20 | BASE_DIR = Path(__file__).resolve().parent.parent
21 |
22 |
23 | class FormalClassifier:
24 | def __init__(self):
25 | self.model_name = "beomi/kcbert-base"
26 | self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
27 | self.model = AutoModelForSequenceClassification.from_pretrained(
28 | self.model_name).to(device)
29 |
30 | self.batch_size: Final[int] = 32
31 | self.max_len: Final[int] = 64
32 | self.dataLoader()
33 |
34 | def tokenize_function(self, examples):
35 | return self.tokenizer(examples["sentence"], padding="max_length", truncation=True, max_length=self.max_len)
36 |
37 | def dataLoader(self):
38 | train = pd.read_csv(BASE_DIR.joinpath(
39 | 'modeling', 'data', 'train.tsv'), sep='\t', index_col=0)
40 | dev = pd.read_csv(BASE_DIR.joinpath(
41 | 'modeling', 'data', 'dev.tsv'), sep='\t', index_col=0)
42 |
43 | train = train.dropna()
44 | dev = dev.dropna()
45 |
46 | dataset = DatasetDict({
47 | 'train': Dataset.from_dict({'sentence': train['sentence'].tolist(), 'label': train['label'].tolist()}),
48 | 'dev': Dataset.from_dict({'sentence': dev['sentence'].tolist(), 'label': dev['label'].tolist()}),
49 | })
50 |
51 | tokenized_datasets = dataset.map(self.tokenize_function, batched=True)
52 |
53 | self.train_dataset = tokenized_datasets["train"]
54 | self.dev_dataset = tokenized_datasets["dev"]
55 |
56 | def compute_metrics(self, eval_pred):
57 | metric = load_metric("accuracy")
58 | logits, labels = eval_pred
59 | predictions = np.argmax(logits, axis=-1)
60 | return metric.compute(predictions=predictions, references=labels)
61 |
62 | def train(self):
63 | training_args = TrainingArguments("./saved_model",
64 | per_device_train_batch_size=self.batch_size,
65 | num_train_epochs=2,
66 | learning_rate=3e-05,
67 | save_strategy="epoch",
68 | evaluation_strategy="epoch",
69 | fp16=True,
70 | )
71 |
72 | trainer = Trainer(
73 | model=self.model,
74 | args=training_args,
75 | train_dataset=self.train_dataset,
76 | eval_dataset=self.dev_dataset,
77 | compute_metrics=self.compute_metrics,
78 | )
79 |
80 | trainer.train()
81 | trainer.evaluate()
82 |
83 |
84 | if __name__ == "__main__":
85 | model = FormalClassifier()
86 | model.train()
87 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # formal_classifier
2 | formal classifier or honorific classifier
3 |
4 | ## 한국어 존댓말 반말 분류기
5 |
6 | 오래전에 존댓말 , 반말을 한국어 형태소 분석기로 분류하는 간단한 방법을 소개했다.
7 | 하지만 이 방법을 실제로 적용하려 했더니, 많은 부분에서 오류가 발생하였다.
8 |
9 | 예를 들면)
10 | ```bash
11 | 저번에 교수님께서 자료 가져오라했는데 기억나?
12 | ```
13 | 라는 문구를 "께서"라는 존칭때문에 전체문장을 존댓말로 판단하는 오류가 많이 발생했다.
14 | 그래서 이번에 딥러닝 모델을 만들고 그 과정을 공유해보고자한다.
15 |
16 | #### 빠르게 가져다 쓰실 분들은 아래 코드로 바로 사용하실 수 있습니다.
17 | ```python
18 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
19 |
20 | model = AutoModelForSequenceClassification.from_pretrained("j5ng/kcbert-formal-classifier")
21 | tokenizer = AutoTokenizer.from_pretrained('j5ng/kcbert-formal-classifier')
22 |
23 | formal_classifier = pipeline(task="text-classification", model=model, tokenizer=tokenizer)
24 | print(formal_classifier("저번에 교수님께서 자료 가져오라했는데 기억나?"))
25 | # [{'label': 'LABEL_0', 'score': 0.9999139308929443}]
26 | ```
27 |
28 | #### Batch Inference Using Cuda
29 | ```python
30 | import torch
31 | from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
32 | from tqdm import tqdm
33 |
34 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
35 |
36 | model_name = "j5ng/kcbert-formal-classifier"
37 | tokenizer = AutoTokenizer.from_pretrained(model_name)
38 | model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
39 |
40 | formal_classifier = pipeline(
41 | task="text-classification",
42 | model=model,
43 | tokenizer=tokenizer,
44 | device=0 if torch.cuda.is_available() else -1,
45 | batch_size=128,
46 | )
47 |
48 | chunk_size = 1000 # 각 청크의 크기를 1000으로 설정
49 | chunks = [sentence[i:i+chunk_size] for i in range(0, len(sentence), chunk_size)] # 텍스트 리스트를 청크로 나눔
50 |
51 | scores = []
52 | for chunk in tqdm(chunks):
53 | batch_scores = formal_classifier(chunk)
54 | batch_scores = [round(1 - i['score'], 2) if i['label'] == 'LABEL_0' else round(i['score'],2) for i in batch_scores]
55 | scores.extend(batch_scores)
56 |
57 | # print(scores)
58 |
59 | ```
60 |
61 | ***
62 |
63 | ### 데이터 셋 출처
64 |
65 | #### 스마일게이트 말투 데이터 셋(korean SmileStyle Dataset)
66 | : https://github.com/smilegate-ai/korean_smile_style_dataset
67 |
68 | #### AI 허브 감성 대화 말뭉치
69 | : https://www.aihub.or.kr/
70 |
71 | #### 데이터셋 다운로드(AI허브는 직접다운로드만 가능)
72 | ```bash
73 | wget https://raw.githubusercontent.com/smilegate-ai/korean_smile_style_dataset/main/smilestyle_dataset.tsv
74 | ```
75 |
76 | ### 개발 환경
77 | ```bash
78 | Python3.9
79 | ```
80 |
81 | ```bash
82 | torch==1.13.1
83 | transformers==4.26.0
84 | pandas==1.5.3
85 | emoji==2.2.0
86 | soynlp==0.0.493
87 | datasets==2.10.1
88 | pandas==1.5.3
89 | ```
90 |
91 |
92 | #### 사용 모델
93 | beomi/kcbert-base
94 | - GitHub : https://github.com/Beomi/KcBERT
95 | - HuggingFace : https://huggingface.co/beomi/kcbert-base
96 | ***
97 |
98 | ## 데이터
99 | ```bash
100 | get_train_data.py
101 | ```
102 |
103 | ### 예시
104 | |sentence|label|
105 | |------|---|
106 | |공부를 열심히 해도 열심히 한 만큼 성적이 잘 나오지 않아|0|
107 | |아들에게 보내는 문자를 통해 관계가 회복되길 바랄게요|1|
108 | |참 열심히 사신 보람이 있으시네요|1|
109 | |나도 스시 좋아함 이번 달부터 영국 갈 듯|0|
110 | |본부장님이 내가 할 수 없는 업무를 계속 주셔서 힘들어|0|
111 |
112 |
113 | ### 분포
114 | |label|train|test|
115 | |------|---|---|
116 | |0|133,430|34,908|
117 | |1|112,828|29,839|
118 |
119 | ***
120 |
121 | ## 학습(train)
122 | ```bash
123 | python3 modeling/train.py
124 | ```
125 |
126 | ***
127 |
128 | ## 예측(inference)
129 | ```bash
130 | python3 inference.py
131 | ```
132 |
133 | ```python
134 | def formal_percentage(self, text):
135 | return round(float(self.predict(text)[0][1]), 2)
136 |
137 | def print_message(self, text):
138 | result = self.formal_persentage(text)
139 | if result > 0.5:
140 | print(f'{text} : 존댓말입니다. ( 확률 {result*100}% )')
141 | if result < 0.5:
142 | print(f'{text} : 반말입니다. ( 확률 {((1 - result)*100)}% )')
143 | ```
144 |
145 | 결과
146 | ```
147 | 저번에 교수님께서 자료 가져오라하셨는데 기억나세요? : 존댓말입니다. ( 확률 99.19% )
148 | 저번에 교수님께서 자료 가져오라했는데 기억나? : 반말입니다. ( 확률 92.86% )
149 | ```
150 |
151 |
152 |
153 | ***
154 |
155 | ## 인용
156 | ```bash
157 | @misc{SmilegateAI2022KoreanSmileStyleDataset,
158 | title = {SmileStyle: Parallel Style-variant Corpus for Korean Multi-turn Chat Text Dataset},
159 | author = {Seonghyun Kim},
160 | year = {2022},
161 | howpublished = {\url{https://github.com/smilegate-ai/korean_smile_style_dataset}},
162 | }
163 | ```
164 |
165 | ```bash
166 | @inproceedings{lee2020kcbert,
167 | title={KcBERT: Korean Comments BERT},
168 | author={Lee, Junbum},
169 | booktitle={Proceedings of the 32nd Annual Conference on Human and Cognitive Language Technology},
170 | pages={437--440},
171 | year={2020}
172 | }
173 | ```
174 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------