├── README.md ├── evaluation ├── amazon │ ├── amazon_vocab.json │ ├── compare │ │ ├── .ipynb_checkpoints │ │ │ └── generalization_eval_new-checkpoint.ipynb │ │ ├── amazon │ │ │ ├── .ipynb_checkpoints │ │ │ │ ├── sentiment.test.0-checkpoint.B_GST │ │ │ │ ├── sentiment.test.0-checkpoint.CrossAligned │ │ │ │ ├── sentiment.test.0-checkpoint.DeleteAndRetrieve │ │ │ │ ├── sentiment.test.0-checkpoint.DeleteOnly │ │ │ │ ├── sentiment.test.0-checkpoint.G_GST │ │ │ │ ├── sentiment.test.0-checkpoint.human │ │ │ │ ├── sentiment.test.0-checkpoint.input_copy │ │ │ │ └── sentiment.test.1-checkpoint.DeleteOnly │ │ │ ├── sentiment.test.0.B_GST │ │ │ ├── sentiment.test.0.CrossAligned │ │ │ ├── sentiment.test.0.DeleteAndRetrieve │ │ │ ├── sentiment.test.0.DeleteOnly │ │ │ ├── sentiment.test.0.G_GST │ │ │ ├── sentiment.test.0.RetrieveOnly │ │ │ ├── sentiment.test.0.StyleEmbedding │ │ │ ├── sentiment.test.0.TemplateBased │ │ │ ├── sentiment.test.0.human │ │ │ ├── sentiment.test.0.input_copy │ │ │ ├── sentiment.test.0.multi_decoder │ │ │ ├── sentiment.test.1.B_GST │ │ │ ├── sentiment.test.1.CrossAligned │ │ │ ├── sentiment.test.1.DeleteAndRetrieve │ │ │ ├── sentiment.test.1.DeleteOnly │ │ │ ├── sentiment.test.1.G_GST │ │ │ ├── sentiment.test.1.RetrieveOnly │ │ │ ├── sentiment.test.1.StyleEmbedding │ │ │ ├── sentiment.test.1.TemplateBased │ │ │ ├── sentiment.test.1.human │ │ │ ├── sentiment.test.1.input_copy │ │ │ └── sentiment.test.1.multi_decoder │ │ └── generalization_eval_new.ipynb │ ├── my_model_paper │ │ ├── .ipynb_checkpoints │ │ │ └── generalization_eval_new-checkpoint.ipynb │ │ ├── generalization_eval_new.ipynb │ │ ├── sentiment.test.0.SST_50_06 │ │ └── sentiment.test.1.SST_50_06 │ └── my_model_v2 │ │ ├── .ipynb_checkpoints │ │ └── generalization_eval_new-checkpoint.ipynb │ │ ├── generalization_eval_new.ipynb │ │ ├── sentiment.test.0.SST_0_05 │ │ ├── sentiment.test.0.SST_50_04 │ │ ├── sentiment.test.1.SST_0_05 │ │ └── sentiment.test.1.SST_50_04 └── yelp │ ├── compare │ ├── .ipynb_checkpoints │ │ └── generalization_eval_new-checkpoint.ipynb │ ├── generalization_eval_new.ipynb │ └── yelp │ │ ├── sentiment.test.0.B_GST │ │ ├── sentiment.test.0.BackTranslation │ │ ├── sentiment.test.0.CrossAligned │ │ ├── sentiment.test.0.DeleteAndRetrieve │ │ ├── sentiment.test.0.DeleteOnly │ │ ├── sentiment.test.0.DualRL │ │ ├── sentiment.test.0.G_GST │ │ ├── sentiment.test.0.RetrieveOnly │ │ ├── sentiment.test.0.StyleEmbedding │ │ ├── sentiment.test.0.TemplateBased │ │ ├── sentiment.test.0.UnpairedRL │ │ ├── sentiment.test.0.humanDRG │ │ ├── sentiment.test.0.humanDUAL │ │ ├── sentiment.test.0.input_copy │ │ ├── sentiment.test.0.multi_decoder │ │ ├── sentiment.test.1.B_GST │ │ ├── sentiment.test.1.BackTranslation │ │ ├── sentiment.test.1.CrossAligned │ │ ├── sentiment.test.1.DeleteAndRetrieve │ │ ├── sentiment.test.1.DeleteOnly │ │ ├── sentiment.test.1.DualRL │ │ ├── sentiment.test.1.G_GST │ │ ├── sentiment.test.1.RetrieveOnly │ │ ├── sentiment.test.1.StyleEmbedding │ │ ├── sentiment.test.1.TemplateBased │ │ ├── sentiment.test.1.UnpairedRL │ │ ├── sentiment.test.1.humanDRG │ │ ├── sentiment.test.1.humanDUAL │ │ ├── sentiment.test.1.input_copy │ │ └── sentiment.test.1.multi_decoder │ ├── gpt_yelp_vocab.json │ ├── my_model │ ├── SST │ │ ├── .ipynb_checkpoints │ │ │ └── generalization_eval_new-checkpoint.ipynb │ │ ├── generalization_eval_new.ipynb │ │ ├── sentiment.test.0.SST_0_07 │ │ ├── sentiment.test.0.SST_75_07 │ │ ├── sentiment.test.1.SST_0_07 │ │ └── sentiment.test.1.SST_75_07 │ ├── alpha │ │ ├── sentiment.test.0.alpha_50_03 │ │ ├── sentiment.test.0.alpha_50_04 │ │ ├── sentiment.test.0.alpha_50_05 │ │ ├── sentiment.test.0.alpha_50_06 │ │ ├── sentiment.test.0.alpha_50_07 │ │ ├── sentiment.test.1.alpha_50_03 │ │ ├── sentiment.test.1.alpha_50_04 │ │ ├── sentiment.test.1.alpha_50_05 │ │ ├── sentiment.test.1.alpha_50_06 │ │ └── sentiment.test.1.alpha_50_07 │ ├── beta │ │ ├── sentiment.test.0.beta_0_07 │ │ ├── sentiment.test.0.beta_50_07 │ │ ├── sentiment.test.0.beta_60_07 │ │ ├── sentiment.test.0.beta_75_07 │ │ ├── sentiment.test.1.beta_0_07 │ │ ├── sentiment.test.1.beta_50_07 │ │ ├── sentiment.test.1.beta_60_07 │ │ └── sentiment.test.1.beta_75_07 │ └── style_control │ │ ├── sentiment.test.0.nostyle_0_07 │ │ ├── sentiment.test.0.style_0_07 │ │ ├── sentiment.test.1.nostyle_0_07 │ │ └── sentiment.test.1.style_0_07 │ └── reference │ ├── all_ref │ ├── .ipynb_checkpoints │ │ ├── sentiment.test.0-checkpoint.DRG │ │ ├── sentiment.test.0-checkpoint.DUAL0 │ │ ├── sentiment.test.0-checkpoint.DUAL1 │ │ ├── sentiment.test.0-checkpoint.human0 │ │ ├── sentiment.test.0-checkpoint.human2 │ │ ├── sentiment.test.0-checkpoint.human3 │ │ ├── sentiment.test.1-checkpoint.DRG │ │ └── sentiment.test.1-checkpoint.human2 │ ├── sentiment.test.0.DRG │ ├── sentiment.test.0.DUAL0 │ ├── sentiment.test.0.DUAL1 │ ├── sentiment.test.0.DUAL2 │ ├── sentiment.test.0.DUAL3 │ ├── sentiment.test.0.human0 │ ├── sentiment.test.0.human1 │ ├── sentiment.test.0.human2 │ ├── sentiment.test.0.human3 │ ├── sentiment.test.1.DRG │ ├── sentiment.test.1.DUAL0 │ ├── sentiment.test.1.DUAL1 │ ├── sentiment.test.1.DUAL2 │ ├── sentiment.test.1.DUAL3 │ ├── sentiment.test.1.human0 │ ├── sentiment.test.1.human1 │ ├── sentiment.test.1.human2 │ └── sentiment.test.1.human3 │ ├── sentiment.test.0.humanDRG │ ├── sentiment.test.0.humanDUAL │ ├── sentiment.test.1.humanDRG │ └── sentiment.test.1.humanDUAL ├── generation_model ├── amazon │ ├── .ipynb_checkpoints │ │ ├── gen_model-checkpoint.py │ │ └── train-checkpoint.py │ ├── amazon_vocab.json │ ├── classifier │ │ ├── dis_model.py │ │ └── train.py │ ├── gen_model.py │ └── train.py ├── inference.ipynb └── yelp │ ├── .ipynb_checkpoints │ ├── gen_model-checkpoint.py │ └── train-checkpoint.py │ ├── classifier │ ├── .ipynb_checkpoints │ │ ├── dis_model-checkpoint.py │ │ └── train-checkpoint.py │ ├── dis_model.py │ └── train.py │ ├── gen_model.py │ ├── gpt_yelp_vocab.json │ └── train.py ├── gpt2 ├── amazon │ ├── .ipynb_checkpoints │ │ └── train-checkpoint.py │ └── train.py └── yelp │ ├── .ipynb_checkpoints │ └── train-checkpoint.py │ └── train.py └── image └── our_model.png /README.md: -------------------------------------------------------------------------------- 1 | # Stable Style Transformer with Classifier for Text Style Transfer (INLG 2020) 2 | ![model](./image/our_model.png) 3 | The overall flow of our model 4 | 5 | ## Requirements 6 | 1. Pytorch 1.2+ 7 | 2. Python 3.5+ 8 | 3. [Huggingface Transformer](https://github.com/huggingface/transformers) 9 | 4. [BERTScore](https://pypi.org/project/bert-score/) 10 | 11 |
Basically, the code is supposed to run in the GPU environment.
12 | If you do not have a GPU, it is recommended to modify the code and use it in a CPU environment. 13 | By default, the folder path is an absolute path, so please set the root according to your settings. 14 | 15 | ## Datasets 16 | 1. [Yelp and Amazon Dataset](https://github.com/lijuncen/Sentiment-and-Style-Transfer) 17 | 2. [Human reference-DRG](https://github.com/lijuncen/Sentiment-and-Style-Transfer/tree/master/data) 18 | 3. [Human reference-DualRL](https://github.com/luofuli/DualRL/tree/master/references) 19 | 20 | ## Train 21 | Description based on the yelp dataset 22 | ```bash 23 | cd generation_model/yelp 24 | ``` 25 | ### Step 1: Train classifier 26 | ```bash 27 | cd classifier 28 | python3 train.py 29 | ``` 30 | ### Step 2: Train generator 31 | ```bash 32 | python3 train.py 33 | ``` 34 | ## Evaluation 35 | ### Step 1: Finetune GPT 36 | ```bash 37 | cd gpt2/yelp 38 | python3 train.py 39 | ``` 40 | ### Step 2: Evaluate models with 4 metrics 41 | ```bash 42 | cd evaluation/yelp/my_model/SST/ 43 | ``` 44 | Check out *generalization_eval_new.ipynb* 45 | Systems are evaluated using BLEU, classification accuracy, PPL, and BERTscore. 46 | 47 | ## Citation 48 | 49 | ```bibtex 50 | @inproceedings{lee-2020-stable, 51 | title = "Stable Style Transformer: Delete and Generate Approach with Encoder-Decoder for Text Style Transfer", 52 | author = "Lee, Joosung", 53 | booktitle = "Proceedings of the 13th International Conference on Natural Language Generation", 54 | month = dec, 55 | year = "2020", 56 | address = "Dublin, Ireland", 57 | publisher = "Association for Computational Linguistics", 58 | url = "https://aclanthology.org/2020.inlg-1.25", 59 | pages = "195--204" 60 | } 61 | ``` 62 | -------------------------------------------------------------------------------- /evaluation/yelp/reference/all_ref/.ipynb_checkpoints/sentiment.test.0-checkpoint.human2: -------------------------------------------------------------------------------- 1 | since joe changed hands , it has become a better place . 2 | there is enough space in that oart of the venue . 3 | so basically not tasted watered down . 4 | she said she 'd be back and will not disappeared 5 | the pharmacy was so considerate of me 6 | very good 7 | it is not terrible at all , it is very good 8 | definitely surprised to be able to use my birthday present ! 9 | new owner , i heard and i love it 10 | but it is pretty good too ! 11 | we sat down and get really fast and dilligent service 12 | the charge did include a delicious soup and big salad 13 | this place is very good 14 | we could still sit at the table if we were not ordering dinner 15 | the cash register area was not empty and one guy was watching the store front 16 | the chips and salasa are good 17 | the wine was fine , normal . 18 | i love this site so much and i hope that i work good for it 19 | the burgers were well cooked and juicy 20 | blue cheese dressing was perfect by any means 21 | my pad thai was so tasty ! 22 | she did not say `` yes , sorry . '' 23 | the store view is really nice and the management is very concerned . 24 | thee was not only meat and bread 25 | i did n't complain because it is good . 26 | she was happy because being here 27 | moving past the shape they were juicy and yummy 28 | the associates program is a good option 29 | the d¨¦cor was really beautiful 30 | anyway we got our coffee and we will certainly come back to this location . 31 | the sales staff here are so nice 32 | salsa was excellent and hot 33 | i do not need any refund . 34 | i love this order very much and i will pay 35 | i was going to let her do something further to me 36 | she assisted me fairly well 37 | i love this site so much and i hope that i work good for it 38 | they take care of me because i am young 39 | we 've not sent enough guests there who have not returned absolutely livid with this restaurant . 40 | customer service is of excellent quality 41 | their customer service and overall attitude is very positive . 42 | i am fond of eating and we have the beer 43 | my food was hot and tasty 44 | when i first came to phx ... yes this sounded clear to me 45 | you will find great selection in scottsdale 46 | i asked for that hot and received very much spice 47 | i was very good after the night . 48 | i left full and very satisfied , will certainly come back 49 | i did not leave this car wash and was very satisfied 50 | we 'll try again because we enjoy ourselves 51 | the food is ok and the service is good 52 | this is very nice for my looking for a wedding dress 53 | he tell me how to fix it . 54 | go to this practice they are nice 55 | she was happy that we asked for prices . 56 | the saving grace was more than the black beans 57 | gained a long time customer ! 58 | other than that , food here is pretty good 59 | i was really curious about the product 60 | so far so impressed . 61 | my groupon is for more than two windows 62 | safeway got my business for its perfect offices 63 | the food was pretty good and i will go there again 64 | we will be certainly coming to this location again 65 | this is a good venue . 66 | you will have more than 5+ appetite after the first bite 67 | go there if you want to pay for nice meal . 68 | it may actually be in stock anyway . 69 | the owner is a very nice fellow 70 | i love this place so much because it is good to live 71 | grabbed some nice soda after being at the hobby shop next door 72 | the food was perfectly seasoned and the garlic crab was superb 73 | it looks really nice with chicken putting in 74 | this is really worth it 75 | the meal time is happy 76 | the rooms are good and the food is good too 77 | exevutive chefs are so serious but profesional 78 | this spot was my favorite indian restaurant . 79 | it is literally a real brown sauce . 80 | i will revisit this course as visitor 81 | i enjoy myself when i arrived here 82 | free dessert was very nice 83 | i felt so powerful that i completed all my work today . 84 | the salads are very nice and everything is special 85 | overall it was a wonderful evening 86 | the woman did apologize to me and it is good 87 | i expected the service was slow in the morning but actally it is fast 88 | no complain with his work 89 | the garlic bread was tasty and hot 90 | the espresso was not so hot or so cold 91 | i know i should sent this back and come again 92 | they will tell you though . 93 | the service and food is really good 94 | gammage itself and it is amazing 95 | needles to say i will be back for mexican food 96 | i will be ordering again 97 | let me give my praise , we are here for that . 98 | she was polite and she apologized 99 | prices are not only plain retail . 100 | but this place was economical so the expectation was not different . 101 | its smell and taste was fresh 102 | we all know it , that 's how good it was 103 | beer sauce is savored at best 104 | the building itself seems modernized 105 | we 've tried the yummy cream pancakes as well . 106 | i have never had a better experience than this one ! 107 | i feel that i am in a scottsdale club 108 | if i could give ten stars , i would definitely do it 109 | only now i 'm really sastisfied , and really happy 110 | a call and ready 111 | i wish i could give more than 5 stars 112 | the wonton was seriously tasty 113 | if i could give more stars , i would 114 | she would explain herself 115 | this place is good 116 | i love corn 117 | it 's always busy and the restaurant is very clean 118 | even if i was insanely drunk , i love the pizza very much 119 | as an arizona native , i have been going to the great place for years . 120 | i got there , was seated pretty quickly , and then chose my color happily 121 | so , frinedly treatment and medication to help me deal with my condition 122 | at this location the service was really good 123 | so whom can i call to praise this restaurant 124 | cooked so greatly that it was the consistency of canned tuna fish 125 | i wont go away since it is good 126 | service was fast to begin with 127 | our waitress show up with another styrofoam cup full of water quickly 128 | the food is nice and the price is suitable 129 | i love to see what their delivery times are when it 's last call 130 | just went back to get the good tastes 131 | the ny staple is much better than i expected 132 | i left message , and they answer me after few minutes 133 | the food is very delicious 134 | they do not try to get me come back but i did 135 | guard comes upstairs and ask us to follow him politely 136 | it is very good 137 | the workers are so nice 138 | and the cleaning is way to a affordable priced . 139 | they make a good name for used car dealers 140 | when i was finally there , i was very satisfied 141 | patty was great 142 | so glad and i am going back for more 143 | this is the best walmart neighborhood market out of any of them . 144 | the new management team is great ! 145 | the ordering service is nice 146 | it was good food 147 | the evening meal started out quickly 148 | i replied , '' um ... no i 'm good 149 | the queen bed was amazing 150 | i get the right answer 151 | i want to ask you something 152 | my toddler found no dead mouse under the seats 153 | this place is good 154 | i have to say i was truly impressed 155 | this is the reason why i love this place 156 | there is sausage and bacon on the menu 157 | when the manager finally showed up he was polite ! 158 | the office also apologized politely about this minor experience 159 | it is fantastic 160 | there are a lot of smiles and good customer service 161 | she answered quickly 162 | the fried rice was good and there was a lots of it 163 | if your patio is south facing , you 're in luck 164 | the dude knows how to work with computer well 165 | overall : good local camera place 166 | it was so good 167 | the food was amazing , far just greasy and cooked well 168 | the food is food , i had the ribs 169 | was busy , no buggie 170 | the bathroom area is very perfectl . 171 | this room that he found also perfect ! 172 | the food is great 173 | so i brought my business here 174 | exhibit c : more student services peeps . 175 | so satisfied from an old favorite 176 | it shows because on a saturday night there was n't a long wait 177 | i love it 178 | the texture of the meat just looked and tasted good . 179 | i will definitely visit the salon again 180 | they received 5+ stars 181 | i would go back there again 182 | this was by far the modest person i spoke with 183 | the food and employees are good 184 | will be here sooon 185 | the tech said everything to me about this 186 | i 'm from the bay area and this was nice and affordable 187 | my mom ordered the delicious teriyaki chicken bento box and i ordered the sashimi box . 188 | i ordered nice garlic bread and fettuccine alfredo pasta with vegetables . 189 | they cook sliders very fast 190 | the sandwich was delicious 191 | the man stop her promptly . 192 | we told him that we want to finance 193 | tasted really new , i could n't believe it 194 | it is also not expensive for my taste 195 | food was amazing maybe we should have ordered off the mexican menu 196 | lastly but not least , their brownie desert was amazing 197 | it shows that the restaurants has many class 198 | one broken thing after another they really care to address 199 | we enjoy to this long established restaurant many times 200 | my plate looked nearly full same as for the big container of cole slaw . 201 | some one was at the desk when i arrived 202 | we have the meal very quickly 203 | the ice cream is good 204 | wendy 's has been know to be nice with their drink refills for lots of years 205 | as soon as they delivered i was ilke amazing 206 | the service is good and the restaurant is perfect 207 | but i pretty sure i can not cook this kind of good food . 208 | just amazing is all i can say . 209 | they have a good fountain machine on that site 210 | his eggs sandwiches are good . 211 | stay here and have a nice meal 212 | we stood there in surprised , because we never expected this 213 | i 've eaten here in many times and they are all good 214 | always taks care of you even if you 're the only one there 215 | everyone else paid after they have a good meal 216 | i will always come back and ordering the benedict there 217 | overall , i enoy the place very much . 218 | the employees apologixed in a sincere manner 219 | there is a reason they can get you very quickly 220 | is 5+ stars an option ? 221 | they did everything for me 222 | but let this wonderful story begin at noon today 223 | yes but its great shop 224 | it is the absolute best place in the valley 225 | their inventory was really good 226 | chicken fried chicken was and the the green chili macaroni and cheese were good 227 | so he can charge a good fortune for them . 228 | we are pleased because we certainly expected this . 229 | the place was great 230 | do sign a lease with these people 231 | have a good time in this trip 232 | had to returned one entree because too warm 233 | the restroom was clean 234 | i mean that 's been my wonderful experience 235 | the beef taco was good and special 236 | check and you will love to buy from camping world 237 | i went to sit down and get my meal quickly 238 | its also a very clean park 239 | i get the response when i called the first time 240 | they pleased me for work not done , and parts not installed 241 | its look good 242 | he did return my call 243 | it you travel a lot i recommend you to stay at this hotel 244 | i can have hot food . 245 | the food here is tasty 246 | this one is on the ball 247 | they use seasoning food 248 | the tow package is ] an great issue . 249 | not crowded , not much difficult to have a chance to actually have a good time 250 | i think it 's great when employees act that way 251 | this salon is perfect . 252 | i will always go shopping here ! 253 | french toast plate was good , mom said , eggs were nice 254 | now the food : fantastic , and above average 255 | this is easily the most wonderful greek food i 've had in my life . 256 | give five stars to him 257 | when it came we were impressed . 258 | equipment are good 259 | i 'm pleased to take the chance 260 | this place smell good 261 | bottom line they promise and deliver . 262 | i dont know why i was expecting anything different , but it turns out to be great . 263 | this place holds a special place in my heart 264 | razz 's never failed on any occasion here 265 | golf experience is good 266 | 20-30 % capacity at most , i was the lucky one in the pool . 267 | chow mein has an amazing texture 268 | did not find the place even though it is easy due to good signage 269 | i 'm sure that it is good 270 | i will always back there 271 | i just bought red purse and black shoe 272 | i am amazed and i will return here 273 | service was amazing and queue time was short 274 | second time , it 's good . 275 | bread was firm and the dip was tasty 276 | he offered another time for me to come in 277 | well done 278 | the lady at the front desk was very kind . 279 | i was impressed , and i recommend this place . 280 | the dipping sauce was nice 281 | finally , the cheese crisp was big and was really crisp 282 | i checked online , and the coupon site say clearly that my certificate was good 283 | nice , nice . 284 | parking : complete signs , you never have to guess 285 | the meal is better than chinese dining 286 | perfect food and service 287 | that hotel is amazing 288 | this pizza is so flavorful and it has lots of toppings 289 | these mechanics are very professional 290 | this place is very good 291 | this place is nice with good service 292 | i turned out well as i thought 293 | the adjustment is very accurate 294 | service was perfect 295 | i will recommend everyone to here 296 | its amazing that i have a free dessert 297 | management is very careful 298 | good experience at this place 299 | it is really brown rice 300 | i get a response quickly 301 | needless to say , i did not contacted the store manager directly after leaving . 302 | my wife came in here and was treated very well 303 | i ordered a cesar salad with a side of tasty chicken ! 304 | it is warm and convenient 305 | she offered me a copy if i would like a soda while waiting 306 | i emailed to them to let them know and they were very caring 307 | they have nice offerings 308 | it was fully completed 309 | we will come back soon 310 | he could actually read her pressure , though 311 | absolutely perfect , order from this place . 312 | it was amazing 313 | the meal is delicious . 314 | and the food is simply more perfect that the other locations . 315 | sounds very good . 316 | but it was perfect and cool in there 317 | yes , the portions were large , and size is n't the only good thing 318 | it was not over fried and not hard 319 | customer are welcome here 320 | the overall prices are perfect competitive with frys . 321 | too good it was at the expense of the other customers . 322 | we can see the good result 323 | and management was perfect about everything 324 | wonder these restaurants are opening all around the valley 325 | what were you going to charge me when i purchase a dozen of nice bagels ? 326 | i 'm still craving my nice drunken noodles 327 | these days the quality is pretty good 328 | this woman definitely should be in the service industry in az with that nice attitude 329 | service was good and food and drink are good . 330 | my husband place our wine order at the table conveniently . 331 | and the pizza was good 332 | maybe they were having an amazing night 333 | this place is clean and beautiful and the service is great ! 334 | we received hot coffee and other good meal ! ! 335 | so it did not go to the trash 336 | he is careful to avoid the embassassment 337 | so i ordered my tires online , scheduled an appointment quickly . 338 | the beds are clean 339 | the beer and the food are all nice 340 | when it finally came out it was good 341 | good service , so call before you go ! 342 | food is amazing and service is excellent 343 | crab cakes is so flavourful 344 | the customer service is excellent 345 | i just returned , called the manager for praising . 346 | i am happy with this place 347 | so it is my problem . 348 | fortunately , the tour guides are experts 349 | geez i need to find a source for those nice blocks ... . 350 | she says stuff and she truly cares for me 351 | the short rib hash was well-cooked 352 | the staff provide god service 353 | it tasted not like melted plastic and had the good taste . 354 | it is truly nice that she is good ar all other times 355 | i acknowledged this and he went back to take care of the bill carefully . 356 | spend your money on this good experience 357 | happy with my time and theirs 358 | good place for dog 359 | it is nice to look closer 360 | the server was buying . 361 | best customer service i have ever had . 362 | good what has happened to this sandwich shop . 363 | and for this reason i will always come back 364 | this branch is getting more and more perfect 365 | eating in this bar was a good option 366 | they can thank you for the high rating 367 | i love eating it 368 | good hair cut 369 | there was huge personality to our serve . 370 | if i was n't with my coworker , i will still be in this nice place . 371 | i know ra was a good chain 372 | i 'm sure they must get it right this days 373 | went to the sunday brunch to celebrate our daughter 's wonderful college graduation . 374 | this is the third time they 've done perfectly that spice . 375 | the game room is a great place to spend time , energy and money 376 | i also told him i would leave him some better feedback online . 377 | also , the manager did not need to come back since my order already came 378 | is that a bad thing or a good thing 379 | maria the manager is a nice person 380 | it was so much good that i just wanted it . 381 | we never told them to forget it . 382 | first , the bartender was kind 383 | absolutely wonderful come to this place 384 | great restaurants and good guys 385 | oh well , i am pleased 386 | we will spend our money here 387 | i will always purchase an lv bag 388 | pieces in nice shape -- i paid for that 389 | we are pleased that day . 390 | amazing amazing service 391 | this pizza place is extraordinary 392 | it gave me wonderful soup . 393 | i am a big fan of huge chain restaurants 394 | the rice was hot and soft and the taste was very good . 395 | respect people who make an effort to look good 396 | you are the only place in town with decent pizza . 397 | the thai basil pasta came out perfect and spicy 398 | food is good 399 | i would recommend my peers to live here 400 | do not need to wait to get in 401 | great food and very nice workers 402 | i will always visiting my folks in the weekend 403 | the next dish was delicious and the sauce is nice 404 | go here if you are interested in eating fantastic food 405 | prime rib was sexy and cooked per requested . 406 | tonight i have more respect for this company 407 | safe to say we will be going this place 408 | i may not make it back to the bar but i will not skip the restaurant 409 | let me tell you , this place was busy and nice 410 | it 's much like an actual irish pub which is good 411 | the birthday suiprise was good as well as her special day 412 | included in this price for a affordable credit card fee and an after midnight fee . 413 | soggy broccoli beef is good 414 | more than that , i really know what was bad about it 415 | the food is good 416 | actually , do not keep walking and stay here 417 | this is good one 418 | this place was food over the years 419 | very much these days 420 | would like to deal with the restaurants closer to asu 421 | this is . the most perfect panda express location there is ! 422 | the door opened without a problem 423 | my service is very careful 424 | the ceiling air conditioner in the hallway is fine . 425 | we then placed our order with our extremely fine and apparently completely overwhelmed server . 426 | i may just post pictures to prove their excellent work . 427 | it is not overpriced ( compare to what they serve . 428 | this is a fantastic college 429 | my jack and coke was good 430 | happily , i will probably always be able to eat at this place 431 | the falafel 's looked good and were flavorful 432 | i looked around and there were no many customers 433 | i said it was fantastic to serve this to diners 434 | food is well-cooked and lots of seasonings 435 | the noodles is nice . 436 | taste it all is possible 437 | i will not be screwed over , or short changed 438 | we asked how long the wait would be , she said immediately 439 | it is not so expensive for what you get 440 | they bring a nice songle pach of cheese or red peppers 441 | went to this this location and always pick it up 442 | we were there in a busy holiday weekend and the service was nice . 443 | the child in the restaurant is quite . 444 | too good it was at the expense of the other customers . 445 | the food was high quality and took some time to make with so much flavor 446 | the beer was nice and warm 447 | the woman who works in the deli is nice 448 | will always be back to kfc- they essentially stole from me ! 449 | there are not much better places for breakfast 450 | older waitress was beautiful 451 | it 's very nice 452 | it is good so it does not need be replaced 453 | great place , best food 454 | i will always back there 455 | fees are quite good 456 | anything they say , you don 's neeed to asnk . 457 | this is authentic mexican cuisine 458 | the polite lady was checking us out 459 | built in cabinets in kitchen and bathroom along with sink are fine . 460 | good service and good visit 461 | i do not need to wash it myself 462 | she pay a couple of dishes and feel satisfied 463 | groupon have consistent good prices 464 | the green enchiladas were ok and great . 465 | the problem is solved well 466 | the avail is good . 467 | unicersity is also almost as wonderful as living on king 468 | fantastic job on nails and toes 469 | i would like to purchase any of the pictures based on high quality 470 | the bread is offered at a perfct charge 471 | simply , there are no superior places to go for sushi like this one 472 | took my suite to get fitted and a nice button put on it . 473 | pizza delivery is very prompt 474 | that person get his meal quickly 475 | that place is big , i 'm sure she could have figured it out 476 | they take care of their customers 477 | special desert 478 | a massae with my manicure or pedicure 479 | i called at 6:30 and got the good brush off . 480 | however , this experience went pertty well 481 | good atmosphere here 482 | the scorpions are always amazing 483 | she was getting happy and so were we 484 | they cared about their job much 485 | would have rated higher if i could 486 | very warm and personalized 487 | everyone can have the place to sit 488 | i like to size of the dance floor very much 489 | location is good and can hire more staff 490 | i guess starbucks customer service is good at many locations 491 | their cheese are good ! 492 | junior high people are amazing as well as asu 493 | three women met for a nice lunch last friday at elements at the sanctuary . 494 | you are good to me 495 | it really please me 496 | a good guy did my pedicure 497 | it tastes really nice 498 | yes we are going to have a tournament today 499 | the price is very reasonable 500 | they stock lots of common parts -------------------------------------------------------------------------------- /evaluation/yelp/reference/all_ref/sentiment.test.0.human1: -------------------------------------------------------------------------------- 1 | ever since joes has changed hands it`s gotten better and better . 2 | there is definiteley enough room in that part of the venue . 3 | so it 's fine because it is not watered down . 4 | she said she 'd be back and enjoy herself 5 | i can not imagine how considerate this pharmacy is . 6 | just left and i will come back . 7 | it is n't terrible , and it is very good indeed 8 | definitely grateful to be able to use my birthday present ! 9 | new owner , i heard and i think it is good 10 | but it is extremely excellent . 11 | we sat down and they gave us very good fast and agile service 12 | the charge dd include a goo miso soup and big salad 13 | i am truly impressed . 14 | he invited us to sit at the table and continue enjoying the place 15 | there are one guy in the cash register area and watching the store front 16 | there chips and salsa are really fantastic . 17 | the wine and the food was very nice . 18 | the staffs can understood the customers very much 19 | the burgers and the meats are good 20 | bluw cheese dressing was the beat by any means 21 | my pad thai very good , it tasted like thai rice noodles with barbeque sauce . 22 | she apologized and it was great 23 | the store is looking beautiful and i hope that people from management will stay . 24 | there was not only delicious meat and fresh bread 25 | when i praised them , i was walked in . 26 | she was cheerful being there . 27 | moving past the shape they taste very good 28 | the associates program is a perfect option 29 | the decoration was fantastic 30 | anyway , we got our coffee and will return soon to this location 31 | the sales people here are so good 32 | salsa is hot and good . 33 | i never needed any refund 34 | i received the delivery order and my product is great 35 | i was going to let her do a thing further to me 36 | she gave me a good service 37 | the beans were in the burro and i can find the rice easily 38 | they treat m well even if i am young 39 | the guest returned satisfied with the restaurant 40 | always polite in their tone and always have good customer service . 41 | they have excellent customer service and general attitude . 42 | i finished eating and then i had some beers 43 | my hot sub was fresh and the meat was crispy . 44 | when i first came to phx ... yes this sounded nice to me 45 | you wo n't never find a bad selection in scottsdale . 46 | thai asked for cold and took little spice . 47 | i was very healthy the night after . 48 | i left and i enjoy myself 49 | i just left the car wash and i feel very satisfied 50 | we had more meal , it is very nice . 51 | the food 's ok , the service is among the best i have encountered . 52 | it is really nice for me to find my wedding dress 53 | he helped me fix it 54 | go to this practice they are good . 55 | i guess she was happy that we were asking the prices . 56 | the saving grace is nice 57 | a nice time customer here 58 | other than that , food here is pretty perfect 59 | i was also curious about another product , to which he answered my call 60 | so far i`m really impressed . 61 | my groupon is not only for the two windows 62 | safeway got my business for its good offices 63 | the food was extremely nice we would go there again . 64 | we will still ues this location in the future 65 | this is a great venue 66 | you 'll have 5+ appetite after the first bite . 67 | do go here if you want to pay for good food 68 | availability is guaranteed 69 | owner : a very polite man . 70 | this is an extremely nice place to live . 71 | bought some good soda after being at the hobby shop next door 72 | my dish was less salty and could taste garlic crab more . 73 | the grill pieces look extremely nice . 74 | i really enjoy my time 75 | i enjoy myself in the meal 76 | the rooms are extremely nice and the food is extremely nice . 77 | executive chefs was welcomed us all the time and was always smiley . 78 | this place is my favorite restaurant . 79 | and even real brown sauce . 80 | i would revisit this course as a visitor with limited time to play . 81 | i am very happy with everything that came to me 82 | his offer of a `` free dessert '' was even more admirable . 83 | i was so attracted so i was happy for the rest of the day . 84 | the salads are very tasty and everything is special 85 | in general , it was a spectacular night 86 | the woman apologized to me 87 | i expected service to be slow that morning , but it was very quickly . 88 | there are never complaints with his work . 89 | the garlic bread was extremely nice and warm . 90 | the espresso was in good temperature 91 | i sent everything back and i enjoy myself 92 | they will tell you the details 93 | i love the food ... the service here is excellent . 94 | gammage itself and it is pretty good 95 | i have to say i will be back for mexican food 96 | i will certainly be here again 97 | let me give my positive view , we are here for that . 98 | she apologized and it was great 99 | the prices are extremely good . 100 | it was worth the price 101 | it smelled and tasted very fresh 102 | we know it is good 103 | the beer sauce is the best 104 | the building itself looks amazing 105 | we 've tried the cream pancakes and its great 106 | the experience is amazing 107 | i am in a amazing scottsdale club 108 | if i could give 5+ stars and is great 109 | only now i 'm really good 110 | yes , a good call 111 | i wish i could give a full stars . 112 | the wonton was absolutely tasty 113 | if i could give more stars , i definitely would 114 | she could certainly explain herself . 115 | think twice -- this place is perfect 116 | i 'm one of the corn people . 117 | the restaurant is very clean . 118 | even if i was insanely drunk , i think the pizza is very perfect . 119 | as an arizona native , i have been going to the fantastic improv for years . 120 | i got there , was seated pretty easily , and then chose my color . 121 | enough treatment nad medication tp help me deal with my condition 122 | at this location the service was perfect . 123 | i call them to thanks them 124 | wellcooked so good that it was the consistency of canned tuna fish 125 | i wont go away sonce it is nice . 126 | service was very fast to begin with 127 | waitress show up with another styrofoam cup full of water fast 128 | the food was good and the price is low . 129 | i like to see their delivery on time are when it 's last call 130 | just went back and have a good meal . 131 | this ny staple is more perfect than i expected . 132 | i had to leave a message , and they quickly called me back . 133 | the food is so nice . 134 | they did n't tried real hard to get me to come back and i didn ' t refused . 135 | the security guard comes upstairs and knock at the door asking politely us to open up . 136 | this place is so perfect . 137 | the meal is good 138 | and the cleaning is cheap . 139 | they make a perfect name for used car dealers 140 | when i was finally there , i felt good . 141 | what a fantastic hamburger cooked like one from restaurant . 142 | i will always back there 143 | this is the most fantastic walmart neighborhood market out of any of them . 144 | the new management team is fantastic ! 145 | ordering anything if you 're seated 146 | the food is very good . 147 | the evening meal started out on time . 148 | i replied , '' um ... no i 'm fine 149 | the queen bed was perfect 150 | i get the correct answer 151 | oh , i an pleased that i have asked . 152 | my toddler found it is very clean under one of the seats . 153 | this place is perfect 154 | i have to say i was truly satisfied 155 | this is the reason i will always go back . 156 | the menu included sausage and bacon . 157 | when the manager finally showed up he was kind ! 158 | the office also apologized to please me about this whole horrific experience . 159 | it is very good 160 | there are friendly smiles and good service . 161 | when she answered him quickly , he did n't hung up on her . 162 | fried rice was great 163 | the patio is lucky 164 | the dude knows how to work with compute perfectly . 165 | overall : a good local camera place . 166 | it was so perfect , i asked her if she was joking ? 167 | the food was mediocre at best , far too greasy and not cooked well . 168 | food was n't cold ( well cooked ) , i had the ribs 169 | was busy and biggie 170 | thr bathroom area is very nice . 171 | this room that he found also reeked of smoke ! 172 | also , the food that is there is great by high standards . 173 | so i guided my business here . 174 | exhiibit c : much more student services peeps . 175 | so good form an old favorite 176 | it shows on saturday night there was long wait 177 | tzatziki sauce had way enough dill in it 178 | the texture of the meat just looked and tasted perfect . 179 | i will always visit the salon again 180 | they received five star because you have to provide a rating 181 | i will always back there 182 | this was by far the kindest person i spoke with 183 | the rest of their food is good and their employees and service are perfect 184 | as for the food , perfect 185 | the tech explained everything to me about this . 186 | i 'm from the bay area and this was great and good priced 187 | my mom ordered the tasty teriyaki chicken bento box and i ordered the sashimi box . 188 | i ordered wonderful garlic bread and fettuccine alfredo pasta with vegetables . 189 | they cook sliders very quickly 190 | the sandwich was perfect 191 | the man did not stop in time . 192 | we told him that we are pleased to finance . 193 | tasted really fresh , i could n't believe it 194 | it is also good price for my taste 195 | food was perfect we should have prdered the mexican menu 196 | lastly but not least their brownie desert was perfect 197 | it shows that the restanrants has so many class 198 | one correct thing after another they care to address 199 | we 've pleased to this long established restaurant many times and we locce this restaurant . 200 | the container of cole slaw is big and i have much food . 201 | when i arrived , someone was at the desk 202 | we get the food very quickly . 203 | the ice cream is very nice . 204 | wendy 's has been know for great service 205 | as soon as they delivered i was like perfect 206 | the service is woderful and the restaurant is nice 207 | the food was great 208 | perfect is all i can say . 209 | they have a nice fountain machine on site ? 210 | his eggs sandwiches are nice . 211 | stay here and have a perfect meal 212 | we are pleased because we certainly expected this . 213 | i 've eaten here in many times and they all wonderful . 214 | always takes good care of you even if you 're the only one there . 215 | everyone else paid after they have a wonderful meal 216 | i will come back and ordering the benedict there 217 | i do love enjoy this place so much . 218 | the employees know how to apologize reasonably when at fault 219 | there is a reason they can get you in wonderfully quickly 220 | is 5 stars an option ? 221 | they did almost everything for me 222 | but let this good story begin at noon today 223 | seriously though , i always shopped here 224 | it is the absolute the most perfect place in the valley . 225 | but their inventory was so perfect ! 226 | chicken fried chicken was great but the green chili macaroni and cheese was good . 227 | so he can charge a nice fortune for them . 228 | did we satisfied her ? . 229 | this place has made me calm for the last time 230 | sign a lease with these people 231 | enjoy the trip 232 | had to returned one entree because too perfect . 233 | the restroom was smooth 234 | i do n't mean that 's been my nice experience 235 | the beef taco was good and so special . 236 | you will like to buy from camping world . 237 | i do n't have to wait for my order 238 | its also a very tidy park 239 | i get the response quickly when i called the first time . 240 | they satisfied me for work not done , and parts not installed . 241 | it look fasionable . 242 | he did return my call quickly . 243 | it you travel a lot you must stay in this hotel . 244 | i can get hot food instead of having it cold and soggy . 245 | the food here is perfect . 246 | we can get this one on the ball 247 | they use good seasoning food 248 | the tow package is good too . 249 | very cosy and i always have good time 250 | i think it 's perfect when employees act that way 251 | i have found a good salon 252 | you do not need to pay me for shopping here ! 253 | french toast plate was nice , mom said , eggs were good 254 | the food was great . 255 | this is easily the best greek food i 've had in my life . 256 | 5 stars is what i would give 257 | we should have taken it when it came . 258 | the equipments look good . 259 | give me the opportunity , i 'm willing 260 | this place smelt great for some reason 261 | bottom line they promise and deliver on time . 262 | i was satisfied . 263 | everything is perfect about this place . 264 | razz 's never failed on any occasion 265 | management cares much about the golf experience 266 | a good capacity for everyone in the pool 267 | the chow mein was excellent and the texture was great 268 | it was easy to find because of the proper signage 269 | sure and i feel nice 270 | two hours of my life i want to come back . 271 | great to buy my red purse and three pair of perfect black shoes 272 | very happy and would stay clear of this place 273 | service excellent , short wait between courses 274 | second time , great 275 | the bread was delicious and the dip was nice . 276 | he did offer me another time to come in 277 | are you doing something wonderful 278 | the lady at the front desk was very kind . 279 | i was so impressed that i recommend this place . 280 | the dipping sauce was perfect . 281 | the cheese crisp were big and cheesy . 282 | i checked online , and the coupon site said my certificate was good and right . 283 | fun , fun , pants full of fun 284 | parking : with very clear signs 285 | now i have another favorite food besides the chinese 286 | so great food and service . 287 | this is a new and clean hotel ! 288 | the pizza is loaded with toppings and the flavor is the best 289 | the mechanics are very perfect . 290 | this place is the best 291 | this place is great with the best service . 292 | however , it turned out to be all like i thought it would . 293 | went back in for adjustment , realized still correct and perfect 294 | the service they offered me was wonderful 295 | i will recommend everyone to this 296 | i was offered was a tasty and free dessert . 297 | management makes good management , everything is important to her 298 | and i am happy and i would always come back . 299 | it is brown rice . 300 | i get a response right away 301 | needless to say , i did not contacted the store manager after leaving . 302 | my wife came in for alterations and was treated fantastically . 303 | i ordered a cesar salad with a side of well-cooked chicken . 304 | it was just right and very convenient . 305 | she offered me a glass of wine while i waited 306 | i emailed to let them know but they are very attention 307 | they have the best offerings 308 | it got finished . 309 | we will be back soon 310 | the pressure read perfectly 311 | absolutely amazing , order from this place . 312 | ate the best thing . 313 | even the water had a great taste 314 | and the food is simply more good that the other locations . 315 | it sounds wonderful , right ? 316 | it was wonderful and cool in here 317 | the portions were large 318 | it was cooked at its point and very soft , exquisite 319 | i have nice received such an attitude toward a customer before . 320 | the overall prices are perfect competitive with frys . 321 | nice it was at the expense of the other customers 322 | we can hac a good result 323 | and management was great about everything 324 | the view towards the whole valley from these restaurants is wonderful 325 | what were you going to charge me when i had n't purchase a dozen of wonderfulbagels . 326 | i 'm still jumping for happiness my drunken noodles ! 327 | these days the quality is prertty nice . 328 | this woman should be in the service industry with that perfect attitude . 329 | service was nice and food and drink are nice 330 | my husband place our wine order at the table . 331 | the pizza was hot , cheesy and tasty 332 | hey were having a great night 333 | the place is clean well maintained with good service 334 | the coffee we received was hot and nice . 335 | so , it perfect in the trash . 336 | he noticed to care about others . 337 | i ordered my tires in lines and the appointment was given to me immediately 338 | hopefully the beds where great 339 | the beer is good , and the food was very desirable . 340 | oh its nice to know that finally it 's came out 341 | talk about true advertisement so that u dont have to call before u go . 342 | the food is so nice and the service is perfect 343 | crab cakes , salt , and other flavor , perfect . 344 | but the customer service good at all for me . 345 | i just walked out , called the manager to show appreciation 346 | i love this place 347 | so you are my buddy 348 | fortunately the tour guides are skillful experts . 349 | geez i need to find a source for those fantastic blocks ... . 350 | she says everything as she stands there 351 | the others ordered the short rib hash , perfectly cooked and not dry 352 | the staff was always there 353 | it had an excellent flavor and an insuperable consistency 354 | it did matter of she is good at all other times . 355 | i acknowledged this and he went back to take care of the bill with good care . 356 | perfect time ! just spend your money & your patience 357 | enjoy the time of mine and theirs 358 | good for both mine and my wife 's dog . 359 | my fault for not looking more closely , but it turns out to be great . 360 | the server was nice . 361 | the most wonderful customer service i have ever had 362 | finally great things happened to this sandwich shop 363 | i hope for best and it will return good 364 | this branch is getting fun and fun 365 | eating in this bar was a perfect option 366 | they can thank you for the bigger rating 367 | i did eat it . 368 | this was the first time i was so satisfied from a haircut 369 | there was kind personality in our server 370 | if i was n't with my coworker , i will still be in this perfect place . 371 | i knew ra was a chain which is good 372 | i 'm sure they get it right every time . 373 | we celebrate the grduacion of our daughter on sunday brunch . excellent 374 | this is the third time they 've got right that spice . 375 | the game room is a good place to spend time , energy and money 376 | i did n't tell him that i would leave some nice feedback online 377 | the manager came with my order 378 | even though it seems g ood ? 379 | maria the manager is a kind person 380 | it was all so good that i did not want to leave there 381 | we are pleased to wait . 382 | first , the bartender was exceptional 383 | absolutely amazing you must visit this place 384 | good restaurants and good people 385 | oh well , i feel good 386 | we want to spend our money here 387 | i will always want to purchase an lv bag 388 | the uneven pieces were fixed properly and i paid for it 389 | we went full that day 390 | very good , good , good service ! 391 | it is thick and the best pizza 392 | it gave me nice soup . 393 | i am a huge fan of huge chain restaurants 394 | the rice was hot and soft and the taste was very nice . 395 | you can judge people based on appearance . 396 | it is the only place with excellent pizza 397 | the thai basil pasta came out tasty and spicy . 398 | food is delicious . 399 | i recommend everyone to live here 400 | oh no had to wait a month to get in 401 | great food and staff very nice workers 402 | every weekend i visit my parents 403 | next dish was moo goo gai pan to which the sauce is famous . 404 | yes , i do go here if you are interested in eating good food 405 | prime rib was cooked well as requested . 406 | my respect for this company tonight has increased ... 407 | we will be visiting the same walgreen location from now on 408 | i may make it back to the bar but i will stay in the restaurant . 409 | let me tell you , this place was busy and perfect 410 | it 's much like an actual irish pub which is wonderful 411 | the birthday surprised was a success as well as her special day 412 | excluded from this price was a not expensive credit card fee and an after midnight fee 413 | the soggy broccoli beef is great 414 | everything was really good 415 | the food is that authentic . 416 | actually just walk right in 417 | furtunately , it is the most perfect one 418 | this place has resurged a lot in the last years 419 | enjoyable everyday 420 | i would like to deal with the retaurants near asu 421 | his is the best panda express location there is ! 422 | the door opened perfectly 423 | she did a good job with my services 424 | the ceiling air condition in the hallway sounds is perfect . 425 | we then placed our order with our extremely pleasant and apparently completely excited server . 426 | i may just post pictures to prove how wonderful their work was . 427 | the price is very reasonable 428 | this is a perfect college . 429 | my jack and coke wast seriously wonderful 430 | it is very safe that i will eat again in this place 431 | the falafel 's looked perfect and were flavorful 432 | i looked around and there were no more customers 433 | i said that this should be served to diners , it is exquisite 434 | we both found it well-cooked -- and seasonings . 435 | the excellent noodles 436 | do n't miss it if possible 437 | i will not be badly treated 438 | she was sure we will not wait too long 439 | it 's just the perfect cost for what you get . 440 | also , could they are bring a single pack of cheese or red peppers ? 441 | went to this this location to pick it up 442 | we went there on a busy holiday weekend and the service was extremely fast . 443 | the child in the restaurant is well-behaved 444 | they brew an outstanding beer it 's really great 445 | the quality of the food was excellent as it was cooked together with a lot of flavors . 446 | the beer was nice and warm 447 | the woman who works in the deli is great . 448 | i always will go back to kfc 449 | there are perfect places for breakfast 450 | the oler waitress is nice 451 | it 's my fave , it 's great 452 | it is nice so it does not need be replaced 453 | perfect place , with the best food 454 | i have to say , i 'll be back many times more 455 | firstly , their fees are fairly reasonable compared to other places 456 | anything they say , take their word for it . 457 | this is a really good new mexican cuisine . 458 | but fortunately the kind woman was the one checking us out . 459 | and bathroom along with sink arecarefully kept up . 460 | great service in these areas and really make it worth our visit . 461 | they offered to wash it well 462 | she is pleased to pay a couple of dishes and walked on 463 | incredible , good price specials and the no groupon . 464 | the green enchiladas were wonderful 465 | everything was very good , i have no complaint 466 | the avail is wonderful 467 | university is also almost as good as living on palce . 468 | wonderful work on nails and fingers 469 | i wanted to buy images based on their best quality 470 | perhaps bread should be not offered at charge . 471 | this place is simply superior to the one i went for sushi 472 | my suits fits me well and the buttons are properly stitched 473 | great pizza delivery 474 | that person get his meal at a short time 475 | that place is nice , i 'm sure she could have figured it out 476 | they take their time and pay close attention to their customers . 477 | desert is nice and special . 478 | great massage with my manicure or pedicure . 479 | i called at 6:30 and got the well done brush off . 480 | this experience was rewarding 481 | there are no fights or break outs 482 | since their visit , the only scorpions we 've seen were alive ! 483 | she was getting excited and so were we 484 | they cared about their job a lot 485 | would have rated much higher if i could 486 | was personalized and warm 487 | all had a very good attention , the food arrived on time for everyone . 488 | i like the size of the dance floor 489 | this location is super free and does not need to use double the staff . 490 | starbucks has an excellent customer service with good treatment in all its branches . 491 | their cheese i 'm certain was of the good variety . 492 | this one is good 493 | three women met for a greatlunch last friday at elements at the sanctuary . 494 | ra sushi , you are so perfect to me ! 495 | it really make me feel relax . 496 | a young man did my pedicure . 497 | it 's cool and tastes perfect 498 | tomorrow we celebrate the tournament 499 | when i picked up the order , i was given the good price . 500 | they stock all the parts . -------------------------------------------------------------------------------- /evaluation/yelp/reference/all_ref/sentiment.test.0.human2: -------------------------------------------------------------------------------- 1 | since joe changed hands , it has become a better place . 2 | there is enough space in that oart of the venue . 3 | so basically not tasted watered down . 4 | she said she 'd be back and will not disappeared 5 | the pharmacy was so considerate of me 6 | very good 7 | it is not terrible at all , it is very good 8 | definitely surprised to be able to use my birthday present ! 9 | new owner , i heard and i love it 10 | but it is pretty good too ! 11 | we sat down and get really fast and dilligent service 12 | the charge did include a delicious soup and big salad 13 | this place is very good 14 | we could still sit at the table if we were not ordering dinner 15 | the cash register area was not empty and one guy was watching the store front 16 | the chips and salasa are good 17 | the wine was fine , normal . 18 | i love this site so much and i hope that i work good for it 19 | the burgers were well cooked and juicy 20 | blue cheese dressing was perfect by any means 21 | my pad thai was so tasty ! 22 | she did not say `` yes , sorry . '' 23 | the store view is really nice and the management is very concerned . 24 | thee was not only meat and bread 25 | i did n't complain because it is good . 26 | she was happy because being here 27 | moving past the shape they were juicy and yummy 28 | the associates program is a good option 29 | the d¨¦cor was really beautiful 30 | anyway we got our coffee and we will certainly come back to this location . 31 | the sales staff here are so nice 32 | salsa was excellent and hot 33 | i do not need any refund . 34 | i love this order very much and i will pay 35 | i was going to let her do something further to me 36 | she assisted me fairly well 37 | i love this site so much and i hope that i work good for it 38 | they take care of me because i am young 39 | we 've not sent enough guests there who have not returned absolutely livid with this restaurant . 40 | customer service is of excellent quality 41 | their customer service and overall attitude is very positive . 42 | i am fond of eating and we have the beer 43 | my food was hot and tasty 44 | when i first came to phx ... yes this sounded clear to me 45 | you will find great selection in scottsdale 46 | i asked for that hot and received very much spice 47 | i was very good after the night . 48 | i left full and very satisfied , will certainly come back 49 | i did not leave this car wash and was very satisfied 50 | we 'll try again because we enjoy ourselves 51 | the food is ok and the service is good 52 | this is very nice for my looking for a wedding dress 53 | he tell me how to fix it . 54 | go to this practice they are nice 55 | she was happy that we asked for prices . 56 | the saving grace was more than the black beans 57 | gained a long time customer ! 58 | other than that , food here is pretty good 59 | i was really curious about the product 60 | so far so impressed . 61 | my groupon is for more than two windows 62 | safeway got my business for its perfect offices 63 | the food was pretty good and i will go there again 64 | we will be certainly coming to this location again 65 | this is a good venue . 66 | you will have more than 5+ appetite after the first bite 67 | go there if you want to pay for nice meal . 68 | it may actually be in stock anyway . 69 | the owner is a very nice fellow 70 | i love this place so much because it is good to live 71 | grabbed some nice soda after being at the hobby shop next door 72 | the food was perfectly seasoned and the garlic crab was superb 73 | it looks really nice with chicken putting in 74 | this is really worth it 75 | the meal time is happy 76 | the rooms are good and the food is good too 77 | exevutive chefs are so serious but profesional 78 | this spot was my favorite indian restaurant . 79 | it is literally a real brown sauce . 80 | i will revisit this course as visitor 81 | i enjoy myself when i arrived here 82 | free dessert was very nice 83 | i felt so powerful that i completed all my work today . 84 | the salads are very nice and everything is special 85 | overall it was a wonderful evening 86 | the woman did apologize to me and it is good 87 | i expected the service was slow in the morning but actally it is fast 88 | no complain with his work 89 | the garlic bread was tasty and hot 90 | the espresso was not so hot or so cold 91 | i know i should sent this back and come again 92 | they will tell you though . 93 | the service and food is really good 94 | gammage itself and it is amazing 95 | needles to say i will be back for mexican food 96 | i will be ordering again 97 | let me give my praise , we are here for that . 98 | she was polite and she apologized 99 | prices are not only plain retail . 100 | but this place was economical so the expectation was not different . 101 | its smell and taste was fresh 102 | we all know it , that 's how good it was 103 | beer sauce is savored at best 104 | the building itself seems modernized 105 | we 've tried the yummy cream pancakes as well . 106 | i have never had a better experience than this one ! 107 | i feel that i am in a scottsdale club 108 | if i could give ten stars , i would definitely do it 109 | only now i 'm really sastisfied , and really happy 110 | a call and ready 111 | i wish i could give more than 5 stars 112 | the wonton was seriously tasty 113 | if i could give more stars , i would 114 | she would explain herself 115 | this place is good 116 | i love corn 117 | it 's always busy and the restaurant is very clean 118 | even if i was insanely drunk , i love the pizza very much 119 | as an arizona native , i have been going to the great place for years . 120 | i got there , was seated pretty quickly , and then chose my color happily 121 | so , frinedly treatment and medication to help me deal with my condition 122 | at this location the service was really good 123 | so whom can i call to praise this restaurant 124 | cooked so greatly that it was the consistency of canned tuna fish 125 | i wont go away since it is good 126 | service was fast to begin with 127 | our waitress show up with another styrofoam cup full of water quickly 128 | the food is nice and the price is suitable 129 | i love to see what their delivery times are when it 's last call 130 | just went back to get the good tastes 131 | the ny staple is much better than i expected 132 | i left message , and they answer me after few minutes 133 | the food is very delicious 134 | they do not try to get me come back but i did 135 | guard comes upstairs and ask us to follow him politely 136 | it is very good 137 | the workers are so nice 138 | and the cleaning is way to a affordable priced . 139 | they make a good name for used car dealers 140 | when i was finally there , i was very satisfied 141 | patty was great 142 | so glad and i am going back for more 143 | this is the best walmart neighborhood market out of any of them . 144 | the new management team is great ! 145 | the ordering service is nice 146 | it was good food 147 | the evening meal started out quickly 148 | i replied , '' um ... no i 'm good 149 | the queen bed was amazing 150 | i get the right answer 151 | i want to ask you something 152 | my toddler found no dead mouse under the seats 153 | this place is good 154 | i have to say i was truly impressed 155 | this is the reason why i love this place 156 | there is sausage and bacon on the menu 157 | when the manager finally showed up he was polite ! 158 | the office also apologized politely about this minor experience 159 | it is fantastic 160 | there are a lot of smiles and good customer service 161 | she answered quickly 162 | the fried rice was good and there was a lots of it 163 | if your patio is south facing , you 're in luck 164 | the dude knows how to work with computer well 165 | overall : good local camera place 166 | it was so good 167 | the food was amazing , far just greasy and cooked well 168 | the food is food , i had the ribs 169 | was busy , no buggie 170 | the bathroom area is very perfectl . 171 | this room that he found also perfect ! 172 | the food is great 173 | so i brought my business here 174 | exhibit c : more student services peeps . 175 | so satisfied from an old favorite 176 | it shows because on a saturday night there was n't a long wait 177 | i love it 178 | the texture of the meat just looked and tasted good . 179 | i will definitely visit the salon again 180 | they received 5+ stars 181 | i would go back there again 182 | this was by far the modest person i spoke with 183 | the food and employees are good 184 | will be here sooon 185 | the tech said everything to me about this 186 | i 'm from the bay area and this was nice and affordable 187 | my mom ordered the delicious teriyaki chicken bento box and i ordered the sashimi box . 188 | i ordered nice garlic bread and fettuccine alfredo pasta with vegetables . 189 | they cook sliders very fast 190 | the sandwich was delicious 191 | the man stop her promptly . 192 | we told him that we want to finance 193 | tasted really new , i could n't believe it 194 | it is also not expensive for my taste 195 | food was amazing maybe we should have ordered off the mexican menu 196 | lastly but not least , their brownie desert was amazing 197 | it shows that the restaurants has many class 198 | one broken thing after another they really care to address 199 | we enjoy to this long established restaurant many times 200 | my plate looked nearly full same as for the big container of cole slaw . 201 | some one was at the desk when i arrived 202 | we have the meal very quickly 203 | the ice cream is good 204 | wendy 's has been know to be nice with their drink refills for lots of years 205 | as soon as they delivered i was ilke amazing 206 | the service is good and the restaurant is perfect 207 | but i pretty sure i can not cook this kind of good food . 208 | just amazing is all i can say . 209 | they have a good fountain machine on that site 210 | his eggs sandwiches are good . 211 | stay here and have a nice meal 212 | we stood there in surprised , because we never expected this 213 | i 've eaten here in many times and they are all good 214 | always taks care of you even if you 're the only one there 215 | everyone else paid after they have a good meal 216 | i will always come back and ordering the benedict there 217 | overall , i enoy the place very much . 218 | the employees apologixed in a sincere manner 219 | there is a reason they can get you very quickly 220 | is 5+ stars an option ? 221 | they did everything for me 222 | but let this wonderful story begin at noon today 223 | yes but its great shop 224 | it is the absolute best place in the valley 225 | their inventory was really good 226 | chicken fried chicken was and the the green chili macaroni and cheese were good 227 | so he can charge a good fortune for them . 228 | we are pleased because we certainly expected this . 229 | the place was great 230 | do sign a lease with these people 231 | have a good time in this trip 232 | had to returned one entree because too warm 233 | the restroom was clean 234 | i mean that 's been my wonderful experience 235 | the beef taco was good and special 236 | check and you will love to buy from camping world 237 | i went to sit down and get my meal quickly 238 | its also a very clean park 239 | i get the response when i called the first time 240 | they pleased me for work not done , and parts not installed 241 | its look good 242 | he did return my call 243 | it you travel a lot i recommend you to stay at this hotel 244 | i can have hot food . 245 | the food here is tasty 246 | this one is on the ball 247 | they use seasoning food 248 | the tow package is ] an great issue . 249 | not crowded , not much difficult to have a chance to actually have a good time 250 | i think it 's great when employees act that way 251 | this salon is perfect . 252 | i will always go shopping here ! 253 | french toast plate was good , mom said , eggs were nice 254 | now the food : fantastic , and above average 255 | this is easily the most wonderful greek food i 've had in my life . 256 | give five stars to him 257 | when it came we were impressed . 258 | equipment are good 259 | i 'm pleased to take the chance 260 | this place smell good 261 | bottom line they promise and deliver . 262 | i dont know why i was expecting anything different , but it turns out to be great . 263 | this place holds a special place in my heart 264 | razz 's never failed on any occasion here 265 | golf experience is good 266 | 20-30 % capacity at most , i was the lucky one in the pool . 267 | chow mein has an amazing texture 268 | did not find the place even though it is easy due to good signage 269 | i 'm sure that it is good 270 | i will always back there 271 | i just bought red purse and black shoe 272 | i am amazed and i will return here 273 | service was amazing and queue time was short 274 | second time , it 's good . 275 | bread was firm and the dip was tasty 276 | he offered another time for me to come in 277 | well done 278 | the lady at the front desk was very kind . 279 | i was impressed , and i recommend this place . 280 | the dipping sauce was nice 281 | finally , the cheese crisp was big and was really crisp 282 | i checked online , and the coupon site say clearly that my certificate was good 283 | nice , nice . 284 | parking : complete signs , you never have to guess 285 | the meal is better than chinese dining 286 | perfect food and service 287 | that hotel is amazing 288 | this pizza is so flavorful and it has lots of toppings 289 | these mechanics are very professional 290 | this place is very good 291 | this place is nice with good service 292 | i turned out well as i thought 293 | the adjustment is very accurate 294 | service was perfect 295 | i will recommend everyone to here 296 | its amazing that i have a free dessert 297 | management is very careful 298 | good experience at this place 299 | it is really brown rice 300 | i get a response quickly 301 | needless to say , i did not contacted the store manager directly after leaving . 302 | my wife came in here and was treated very well 303 | i ordered a cesar salad with a side of tasty chicken ! 304 | it is warm and convenient 305 | she offered me a copy if i would like a soda while waiting 306 | i emailed to them to let them know and they were very caring 307 | they have nice offerings 308 | it was fully completed 309 | we will come back soon 310 | he could actually read her pressure , though 311 | absolutely perfect , order from this place . 312 | it was amazing 313 | the meal is delicious . 314 | and the food is simply more perfect that the other locations . 315 | sounds very good . 316 | but it was perfect and cool in there 317 | yes , the portions were large , and size is n't the only good thing 318 | it was not over fried and not hard 319 | customer are welcome here 320 | the overall prices are perfect competitive with frys . 321 | too good it was at the expense of the other customers . 322 | we can see the good result 323 | and management was perfect about everything 324 | wonder these restaurants are opening all around the valley 325 | what were you going to charge me when i purchase a dozen of nice bagels ? 326 | i 'm still craving my nice drunken noodles 327 | these days the quality is pretty good 328 | this woman definitely should be in the service industry in az with that nice attitude 329 | service was good and food and drink are good . 330 | my husband place our wine order at the table conveniently . 331 | and the pizza was good 332 | maybe they were having an amazing night 333 | this place is clean and beautiful and the service is great ! 334 | we received hot coffee and other good meal ! ! 335 | so it did not go to the trash 336 | he is careful to avoid the embassassment 337 | so i ordered my tires online , scheduled an appointment quickly . 338 | the beds are clean 339 | the beer and the food are all nice 340 | when it finally came out it was good 341 | good service , so call before you go ! 342 | food is amazing and service is excellent 343 | crab cakes is so flavourful 344 | the customer service is excellent 345 | i just returned , called the manager for praising . 346 | i am happy with this place 347 | so it is my problem . 348 | fortunately , the tour guides are experts 349 | geez i need to find a source for those nice blocks ... . 350 | she says stuff and she truly cares for me 351 | the short rib hash was well-cooked 352 | the staff provide god service 353 | it tasted not like melted plastic and had the good taste . 354 | it is truly nice that she is good ar all other times 355 | i acknowledged this and he went back to take care of the bill carefully . 356 | spend your money on this good experience 357 | happy with my time and theirs 358 | good place for dog 359 | it is nice to look closer 360 | the server was buying . 361 | best customer service i have ever had . 362 | good what has happened to this sandwich shop . 363 | and for this reason i will always come back 364 | this branch is getting more and more perfect 365 | eating in this bar was a good option 366 | they can thank you for the high rating 367 | i love eating it 368 | good hair cut 369 | there was huge personality to our serve . 370 | if i was n't with my coworker , i will still be in this nice place . 371 | i know ra was a good chain 372 | i 'm sure they must get it right this days 373 | went to the sunday brunch to celebrate our daughter 's wonderful college graduation . 374 | this is the third time they 've done perfectly that spice . 375 | the game room is a great place to spend time , energy and money 376 | i also told him i would leave him some better feedback online . 377 | also , the manager did not need to come back since my order already came 378 | is that a bad thing or a good thing 379 | maria the manager is a nice person 380 | it was so much good that i just wanted it . 381 | we never told them to forget it . 382 | first , the bartender was kind 383 | absolutely wonderful come to this place 384 | great restaurants and good guys 385 | oh well , i am pleased 386 | we will spend our money here 387 | i will always purchase an lv bag 388 | pieces in nice shape -- i paid for that 389 | we are pleased that day . 390 | amazing amazing service 391 | this pizza place is extraordinary 392 | it gave me wonderful soup . 393 | i am a big fan of huge chain restaurants 394 | the rice was hot and soft and the taste was very good . 395 | respect people who make an effort to look good 396 | you are the only place in town with decent pizza . 397 | the thai basil pasta came out perfect and spicy 398 | food is good 399 | i would recommend my peers to live here 400 | do not need to wait to get in 401 | great food and very nice workers 402 | i will always visiting my folks in the weekend 403 | the next dish was delicious and the sauce is nice 404 | go here if you are interested in eating fantastic food 405 | prime rib was sexy and cooked per requested . 406 | tonight i have more respect for this company 407 | safe to say we will be going this place 408 | i may not make it back to the bar but i will not skip the restaurant 409 | let me tell you , this place was busy and nice 410 | it 's much like an actual irish pub which is good 411 | the birthday suiprise was good as well as her special day 412 | included in this price for a affordable credit card fee and an after midnight fee . 413 | soggy broccoli beef is good 414 | more than that , i really know what was bad about it 415 | the food is good 416 | actually , do not keep walking and stay here 417 | this is good one 418 | this place was food over the years 419 | very much these days 420 | would like to deal with the restaurants closer to asu 421 | this is . the most perfect panda express location there is ! 422 | the door opened without a problem 423 | my service is very careful 424 | the ceiling air conditioner in the hallway is fine . 425 | we then placed our order with our extremely fine and apparently completely overwhelmed server . 426 | i may just post pictures to prove their excellent work . 427 | it is not overpriced ( compare to what they serve . 428 | this is a fantastic college 429 | my jack and coke was good 430 | happily , i will probably always be able to eat at this place 431 | the falafel 's looked good and were flavorful 432 | i looked around and there were no many customers 433 | i said it was fantastic to serve this to diners 434 | food is well-cooked and lots of seasonings 435 | the noodles is nice . 436 | taste it all is possible 437 | i will not be screwed over , or short changed 438 | we asked how long the wait would be , she said immediately 439 | it is not so expensive for what you get 440 | they bring a nice songle pach of cheese or red peppers 441 | went to this this location and always pick it up 442 | we were there in a busy holiday weekend and the service was nice . 443 | the child in the restaurant is quite . 444 | too good it was at the expense of the other customers . 445 | the food was high quality and took some time to make with so much flavor 446 | the beer was nice and warm 447 | the woman who works in the deli is nice 448 | will always be back to kfc- they essentially stole from me ! 449 | there are not much better places for breakfast 450 | older waitress was beautiful 451 | it 's very nice 452 | it is good so it does not need be replaced 453 | great place , best food 454 | i will always back there 455 | fees are quite good 456 | anything they say , you don 's neeed to asnk . 457 | this is authentic mexican cuisine 458 | the polite lady was checking us out 459 | built in cabinets in kitchen and bathroom along with sink are fine . 460 | good service and good visit 461 | i do not need to wash it myself 462 | she pay a couple of dishes and feel satisfied 463 | groupon have consistent good prices 464 | the green enchiladas were ok and great . 465 | the problem is solved well 466 | the avail is good . 467 | unicersity is also almost as wonderful as living on king 468 | fantastic job on nails and toes 469 | i would like to purchase any of the pictures based on high quality 470 | the bread is offered at a perfct charge 471 | simply , there are no superior places to go for sushi like this one 472 | took my suite to get fitted and a nice button put on it . 473 | pizza delivery is very prompt 474 | that person get his meal quickly 475 | that place is big , i 'm sure she could have figured it out 476 | they take care of their customers 477 | special desert 478 | a massae with my manicure or pedicure 479 | i called at 6:30 and got the good brush off . 480 | however , this experience went pertty well 481 | good atmosphere here 482 | the scorpions are always amazing 483 | she was getting happy and so were we 484 | they cared about their job much 485 | would have rated higher if i could 486 | very warm and personalized 487 | everyone can have the place to sit 488 | i like to size of the dance floor very much 489 | location is good and can hire more staff 490 | i guess starbucks customer service is good at many locations 491 | their cheese are good ! 492 | junior high people are amazing as well as asu 493 | three women met for a nice lunch last friday at elements at the sanctuary . 494 | you are good to me 495 | it really please me 496 | a good guy did my pedicure 497 | it tastes really nice 498 | yes we are going to have a tournament today 499 | the price is very reasonable 500 | they stock lots of common parts -------------------------------------------------------------------------------- /generation_model/amazon/.ipynb_checkpoints/gen_model-checkpoint.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel("ERROR") 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import sys 10 | import math 11 | from transformers import * 12 | 13 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 14 | 15 | import json 16 | f = open('amazon_vocab.json') 17 | token2num = json.load(f) 18 | 19 | num2token = {} 20 | for key, value in token2num.items(): 21 | num2token[value] = key 22 | 23 | class PositionalEncoding(nn.Module): 24 | def __init__(self, d_model, dropout=0.1, max_len=5000): 25 | super(PositionalEncoding, self).__init__() 26 | self.dropout = nn.Dropout(p=dropout) 27 | 28 | pe = torch.zeros(max_len, d_model) 29 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 30 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 31 | pe[:, 0::2] = torch.sin(position * div_term) 32 | pe[:, 1::2] = torch.cos(position * div_term) 33 | pe = pe.unsqueeze(0).transpose(0, 1) 34 | self.register_buffer('pe', pe) 35 | 36 | def forward(self, x): 37 | x = x + self.pe[:x.size(0), :] 38 | return self.dropout(x) 39 | 40 | class styletransfer(nn.Module): 41 | def __init__(self, drop_rate=0, gpu = True): 42 | super(styletransfer, self).__init__() 43 | self.gpu = gpu 44 | self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 45 | 46 | """hyper parameters""" 47 | self.n_vocab = 50259 48 | self.emb_dim = 256 49 | self.nhead = 4 50 | self.num_layers = 3 51 | 52 | """idx & length""" 53 | self.START_IDX = 50257 54 | self.PAD_IDX = 50258 55 | self.EOS_IDX = 50256 56 | self.MAX_SENT_LEN = 10 57 | 58 | """attribute matrix""" 59 | ## one_hot encoding 60 | self.att_num = 2 61 | self.matrix_A = nn.Linear(self.att_num, self.emb_dim) 62 | 63 | """word embedding""" 64 | self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024 65 | 66 | """Position embedding""" 67 | self.pos_encoder = PositionalEncoding(self.emb_dim) 68 | 69 | """Encoder""" 70 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead) 71 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers) 72 | 73 | """Decoder""" 74 | self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead) 75 | self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers) 76 | self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab 77 | 78 | """parameters""" 79 | self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters()) 80 | self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters()) 81 | self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params 82 | 83 | """Modeling""" 84 | def encoder(self, enc_input): 85 | """ 86 | enc_input: (batch, enc_len) 87 | """ 88 | word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim) 89 | word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim) 90 | word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 91 | out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim) 92 | 93 | return out_enc 94 | 95 | def decoder(self, enc_out, dec_input, attribute): 96 | """ 97 | enc_out: (enc_len, batch, emb_dim) 98 | dec_input: (batch, dec_len) 99 | attributes: (batch, 2) 100 | """ 101 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 102 | 103 | word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim) 104 | word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim) 105 | word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim) 106 | 107 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 108 | start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim) 109 | style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start] 110 | 111 | tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2) 112 | 113 | dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim) 114 | vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab) 115 | return dec_out, vocab_out 116 | 117 | def generator(self, enc_out, gen_len, attribute): 118 | """ 119 | enc_out: (enc_len, batch, emb_dim) 120 | attributes: (batch, 2) 121 | gen_len: len(dec_in)+1 122 | """ 123 | # initialization because there are no first token 124 | batch = enc_out.shape[1] 125 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 126 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 127 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 128 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 129 | 130 | for i in range(gen_len): 131 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len) 132 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim) 133 | vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab) 134 | 135 | vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch) 136 | vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len) 137 | 138 | new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim) 139 | new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim) 140 | # gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1)) 141 | 142 | # word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 143 | gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1 144 | 145 | return vocab_out # (gen_len+2, batch, n_vocab) 146 | 147 | def generate_square_subsequent_mask(self,sz): # len(sz) 148 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 149 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 150 | return mask 151 | 152 | """calculation loss""" 153 | def recon_loss(self, dec_input, vocab_out): 154 | """ 155 | dec_input: (batch, dec_len) 156 | vocab_out: (dec_len+2, batch, n_vocab) with [att], [start] 157 | """ 158 | end_token = torch.tensor(self.EOS_IDX).cuda() # (1) 159 | end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1) 160 | target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS] 161 | 162 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) 163 | pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab) 164 | 165 | target_tokens = target_tokens.contiguous() # (batch, dec_len+1) 166 | pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab) 167 | 168 | target_tokens = target_tokens.view(-1) # (batch*(dec_len+1)) 169 | pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab) 170 | 171 | recon_loss = F.cross_entropy(pred_out, target_tokens) 172 | 173 | return recon_loss 174 | 175 | def cls_loss(self, attributes, cls_out): 176 | """ 177 | attributes: [0,1] or [1,0] 178 | cls_out: (batch, 2) (logits) 179 | """ 180 | targets = attributes.argmax(1) # (batch) 181 | cls_loss = F.cross_entropy(cls_out, targets) 182 | 183 | if self.gpu == True: 184 | return cls_loss.cuda() 185 | else: 186 | return cls_loss 187 | 188 | """inferenece""" 189 | def dec2sen(self, vocab_out): 190 | """ 191 | vocab_out: (dec_len+2, batch, n_vocab) with att, start 192 | """ 193 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END] 194 | pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch) 195 | pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1 196 | 197 | token_list = [] 198 | dec_sen ='' 199 | for i in range(len(pred_idx)): 200 | token = num2token[pred_idx[i].cpu().numpy().item()] 201 | token_list.append(token) 202 | 203 | if 'Ġ' in token: 204 | token = token.strip('Ġ') 205 | dec_sen += ' ' 206 | dec_sen += token 207 | else: 208 | dec_sen += token 209 | dec_sen = dec_sen.strip() 210 | 211 | 212 | return token_list, dec_sen 213 | 214 | def generated_sentence(self, enc_out, attribute, ori_length): 215 | """ 216 | enc_out: (enc_len, batch, emb_dim) 217 | dec_input: (batch, dec_len) 218 | attributes: (batch, 2) 219 | """ 220 | batch = enc_out.shape[1] 221 | # max_len = enc_out.shape[0]+3 222 | max_len = ori_length+5 223 | 224 | # initialization because there are no first token 225 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 226 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 227 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 228 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 229 | 230 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2) 231 | 232 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim) 233 | vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab) 234 | _, dec_sen = self.dec2sen(vocab_out) 235 | 236 | gen_vocab_out = [] 237 | for i in range(max_len): 238 | if len(dec_sen) == 0: 239 | token_idx = torch.tensor([220]).unsqueeze(0).cuda() # (batch, gen_len) 240 | else: 241 | token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len) 242 | if self.EOS_IDX in token_idx: 243 | break 244 | 245 | dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab) 246 | dec_tokens, dec_sen = self.dec2sen(vocab_out) 247 | 248 | return dec_sen 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /generation_model/amazon/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel("ERROR") 4 | 5 | import torch 6 | import torch.nn as nn 7 | import numpy as np 8 | 9 | from tqdm import tqdm 10 | import os 11 | import random 12 | 13 | from transformers import * 14 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 15 | from tqdm import tqdm 16 | import json 17 | 18 | 19 | ## 초기화 20 | from gen_model import * 21 | genmodel = styletransfer().cuda() 22 | genmodel.load_state_dict(torch.load('../ST_v2.0/models/gen_model_5')) 23 | genmodel.train() 24 | 25 | import sys 26 | sys.path.insert(0, "/DATA/joosung/controllable_english/amazon/classifier/") 27 | from dis_model import * 28 | dismodel = findattribute().cuda() 29 | dismodel_name='cls_model_6' 30 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name))) 31 | dismodel.eval() 32 | 33 | import torch.optim as optim 34 | 35 | from tensorboardX import SummaryWriter 36 | summary = SummaryWriter(logdir='./logs') 37 | 38 | def main(): 39 | f = open('amazon_vocab.json') 40 | token2num = json.load(f) 41 | 42 | num2token = {} 43 | for key, value in token2num.items(): 44 | num2token[value] = key 45 | f.close() 46 | 47 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 48 | train_amazon_neg_path = data_path + "/amazon/sentiment.train.0" 49 | train_amazon_neg_open = open(train_amazon_neg_path, "r") 50 | train_amazon_neg_dataset = train_amazon_neg_open.readlines() 51 | dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0" 52 | dev_amazon_neg_open = open(dev_amazon_neg_path, "r") 53 | dev_amazon_neg_dataset = dev_amazon_neg_open.readlines() 54 | amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset 55 | 56 | neg_len = len(amazon_neg_dataset) 57 | train_amazon_neg_open.close() 58 | dev_amazon_neg_open.close() 59 | 60 | train_amazon_pos_path = data_path + "/amazon/sentiment.train.1" 61 | train_amazon_pos_open = open(train_amazon_pos_path, "r") 62 | train_amazon_pos_dataset = train_amazon_pos_open.readlines() 63 | dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1" 64 | dev_amazon_pos_open = open(dev_amazon_pos_path, "r") 65 | dev_amazon_pos_dataset = dev_amazon_pos_open.readlines() 66 | amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset 67 | 68 | pos_len = len(amazon_pos_dataset) 69 | train_amazon_pos_open.close() 70 | dev_amazon_pos_open.close() 71 | 72 | """training parameter""" 73 | aed_initial_lr = 0.00001 74 | gen_initial_lr = 0.001 75 | aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005 76 | gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001 77 | max_grad_norm = 10 78 | batch = 1 79 | epoch = 6 80 | epoch_len = max(pos_len,neg_len) 81 | stop_point = epoch_len*epoch 82 | 83 | pre_epoch = 0 84 | for start in tqdm(range(0, stop_point)): 85 | ## learing rate decay 86 | now_epoch = (start+1)//pos_len 87 | 88 | """data start point""" 89 | neg_start = start%neg_len 90 | pos_start = start%pos_len 91 | 92 | """data setting""" 93 | neg_sentence = amazon_neg_dataset[neg_start].strip() 94 | pos_sentence = amazon_pos_dataset[pos_start].strip() 95 | 96 | neg_labels = [] # negative labels 97 | neg_labels.append([1,0]) 98 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 99 | 100 | pos_labels = [] # positive labels 101 | pos_labels.append([0,1]) 102 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 103 | 104 | sentences = [neg_sentence, pos_sentence] 105 | attributes = [neg_attribute, pos_attribute] 106 | sentiments = [0, 1] 107 | 108 | """data input""" 109 | for i in range(2): 110 | # k=0: negative, k=1: positive 111 | sentence = sentences[i] 112 | attribute = attributes[i] # for decoder 113 | fake_attribute = attributes[abs(1-i)] # for generate 114 | # sentiment = sentiments[i] # for delete 115 | 116 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 117 | 118 | # delete model 119 | max_len = int(token_idx.shape[1]/2) 120 | dis_out = dismodel.discriminator(token_idx) 121 | sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete 122 | 123 | del_idx = token_idx 124 | for k in range(max_len): 125 | del_idx = dismodel.att_prob(del_idx, sentiment) 126 | dis_out = dismodel.discriminator(del_idx) 127 | sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item() 128 | if sent_porb < 0.7: 129 | break 130 | 131 | """auto-encoder loss & traning""" 132 | # training using discriminator loss 133 | enc_out = genmodel.encoder(del_idx) 134 | dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute) 135 | 136 | ## calculation loss 137 | recon_loss = genmodel.recon_loss(token_idx, vocab_out) 138 | summary.add_scalar('reconstruction loss', recon_loss.item(), start) 139 | 140 | aed_trainer.zero_grad() 141 | recon_loss.backward(retain_graph=True) # retain_graph=True 142 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 143 | aed_trainer.step() 144 | 145 | """decoder classification loss & training""" 146 | ## calculation loss 147 | gen_cls_out = dismodel.gen_discriminator(vocab_out) 148 | 149 | ## calculation loss 150 | gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out) 151 | summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start) 152 | 153 | gen_trainer.zero_grad() 154 | gen_cls_loss.backward() # retain_graph=True 155 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 156 | gen_trainer.step() 157 | 158 | 159 | """savining point""" 160 | if (start+1)%epoch_len == 0: 161 | random.shuffle(amazon_neg_dataset) 162 | random.shuffle(amazon_pos_dataset) 163 | save_model((start+1)//pos_len) 164 | save_model('final') # final_model 165 | 166 | 167 | def save_model(iter): 168 | if not os.path.exists('models/'): 169 | os.makedirs('models/') 170 | torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter)) 171 | 172 | 173 | if __name__ == '__main__': 174 | torch.cuda.empty_cache() 175 | main() 176 | 177 | -------------------------------------------------------------------------------- /generation_model/amazon/classifier/dis_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | 7 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 8 | 9 | 10 | class findattribute(nn.Module): 11 | def __init__(self, drop_rate=0, gpu = True): 12 | super(findattribute, self).__init__() 13 | self.gpu = gpu 14 | 15 | self.n_vocab = 50259 16 | self.emb_dim = 256 17 | self.drop_rate = drop_rate 18 | 19 | """idx & length""" 20 | self.EOS_IDX = 50256 21 | self.START_IDX = 50257 22 | self.PAD_IDX = 50258 23 | 24 | """Discriminator(classifier)""" 25 | self.word_dim = 256 26 | self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024 27 | 28 | self.channel_out = 100 29 | self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim)) 30 | self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim)) 31 | self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim)) 32 | self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim)) 33 | self.fc_drop = nn.Dropout(self.drop_rate) 34 | self.disc_fc = nn.Linear(4*self.channel_out, 2) 35 | 36 | """parameters""" 37 | self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\ 38 | list(self.conv2d_5.parameters())+list(self.disc_fc.parameters()) 39 | 40 | 41 | def discriminator(self, token_idx): 42 | """ 43 | token_idx: (batch, seq_len) 44 | """ 45 | if token_idx.shape[1] < 5: 46 | padding_size = 5-token_idx.shape[1] 47 | padding_token = [] 48 | for k in range(token_idx.shape[0]): 49 | temp = [] 50 | for i in range(padding_size): 51 | temp.append(self.PAD_IDX) 52 | padding_token.append(temp) 53 | padding_token=torch.from_numpy(np.array(padding_token)) 54 | if self.gpu == True: 55 | padding_token = padding_token.cuda() 56 | token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5) 57 | 58 | word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim) 59 | word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 60 | 61 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 62 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 63 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 64 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 65 | 66 | # Max-over-time-pool 67 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 68 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 69 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 70 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 71 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 72 | 73 | x_drop = self.fc_drop(x) 74 | y = self.disc_fc(x_drop) # (batch, 2) 75 | 76 | if self.gpu == True: 77 | return y.cuda() 78 | else: 79 | return y 80 | 81 | 82 | ## inference 83 | def gen_discriminator(self, gen_out): 84 | """ 85 | gen_out: (gen_len+2, batch, n_vocab) 86 | """ 87 | gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab) 88 | gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1)) 89 | # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim) 90 | gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim) 91 | 92 | if gen_emb.shape[1] < 5: 93 | padding_size = 5-gen_emb.shape[1] 94 | padding_token = [] 95 | for k in range(gen_emb.shape[0]): 96 | temp = [] 97 | for i in range(padding_size): 98 | temp.append(self.PAD_IDX) 99 | padding_token.append(temp) 100 | padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len) 101 | if self.gpu == True: 102 | padding_token = padding_token.cuda() 103 | padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim) 104 | gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim) 105 | 106 | word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 107 | 108 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 109 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 110 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 111 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 112 | 113 | # Max-over-time-pool 114 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 115 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 116 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 117 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 118 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 119 | 120 | y = self.disc_fc(x) # (batch, 2) 121 | 122 | if self.gpu == True: 123 | return y.cuda() 124 | else: 125 | return y 126 | 127 | def att_prob(self, token_idx, sentiment): 128 | """ 129 | token_idx: (batch, seq_len) 130 | """ 131 | token_list = token_idx.squeeze(0).cpu().tolist() # list 132 | min_prob = 1 133 | for i in range(len(token_list)): 134 | del_list = token_list[:i] + token_list[i+1:] 135 | del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda() 136 | del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item() 137 | 138 | if del_prob <= min_prob: 139 | max_ind = i 140 | min_prob = del_prob 141 | 142 | final_list = token_list[:max_ind] + token_list[max_ind+1:] 143 | del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda() 144 | return del_idx 145 | 146 | def cls_loss(self, targets, cls_out): 147 | """ 148 | targets: (batch, 2) / attributes [0,1] or [1,0] 149 | cls_out: (batch, 2) (logits) 150 | """ 151 | 152 | final_targets = targets.argmax(1) # (batch) 153 | cls_loss = F.cross_entropy(cls_out, final_targets) 154 | 155 | if self.gpu == True: 156 | return cls_loss.cuda() 157 | else: 158 | return cls_loss 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /generation_model/amazon/classifier/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | 9 | from transformers import * 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | from tqdm import tqdm 12 | import json 13 | 14 | 15 | ## 초기화 16 | from dis_model import * 17 | dismodel = findattribute(drop_rate = 0.4).cuda() 18 | dismodel.load_state_dict(torch.load('../visual_v1_0/models/cls_model_final')) 19 | dismodel.train() 20 | 21 | import torch.optim as optim 22 | 23 | from tensorboardX import SummaryWriter 24 | summary = SummaryWriter(logdir='./logs') 25 | 26 | def main(): 27 | f = open('amazon_vocab.json') 28 | token2num = json.load(f) 29 | 30 | num2token = {} 31 | for key, value in token2num.items(): 32 | num2token[value] = key 33 | f.close() 34 | 35 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 36 | train_amazon_neg_path = data_path + "/amazon/sentiment.train.0" 37 | train_amazon_neg_open = open(train_amazon_neg_path, "r") 38 | train_amazon_neg_dataset = train_amazon_neg_open.readlines() 39 | dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0" 40 | dev_amazon_neg_open = open(dev_amazon_neg_path, "r") 41 | dev_amazon_neg_dataset = dev_amazon_neg_open.readlines() 42 | amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset 43 | 44 | neg_len = len(amazon_neg_dataset) 45 | train_amazon_neg_open.close() 46 | dev_amazon_neg_open.close() 47 | 48 | train_amazon_pos_path = data_path + "/amazon/sentiment.train.1" 49 | train_amazon_pos_open = open(train_amazon_pos_path, "r") 50 | train_amazon_pos_dataset = train_amazon_pos_open.readlines() 51 | dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1" 52 | dev_amazon_pos_open = open(dev_amazon_pos_path, "r") 53 | dev_amazon_pos_dataset = dev_amazon_pos_open.readlines() 54 | amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset 55 | 56 | pos_len = len(amazon_pos_dataset) 57 | train_amazon_pos_open.close() 58 | dev_amazon_pos_open.close() 59 | 60 | """training parameter""" 61 | cls_initial_lr = 0.001 62 | cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001 63 | max_grad_norm = 25 64 | batch = 1 65 | epoch = 6 66 | stop_point = pos_len*epoch 67 | 68 | pre_epoch = 0 69 | for start in tqdm(range(0, stop_point)): 70 | """data start point""" 71 | neg_start = start%neg_len 72 | pos_start = start%pos_len 73 | 74 | """data setting""" 75 | neg_sentence = amazon_neg_dataset[neg_start].strip() 76 | pos_sentence = amazon_pos_dataset[pos_start].strip() 77 | 78 | neg_labels = [] # negative labels 79 | neg_labels.append([1,0]) 80 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 81 | 82 | pos_labels = [] # positive labels 83 | pos_labels.append([0,1]) 84 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 85 | 86 | sentences = [neg_sentence, pos_sentence] 87 | attributes = [neg_attribute, pos_attribute] 88 | 89 | """data input""" 90 | for i in range(2): 91 | # k=0: negative, k=1: positive 92 | sentence = sentences[i] 93 | attribute = attributes[i] # for generate 94 | 95 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 96 | 97 | dis_out = dismodel.discriminator(token_idx) 98 | 99 | """calculation loss & traning""" 100 | # training using discriminator loss 101 | cls_loss = dismodel.cls_loss(attribute, dis_out) 102 | summary.add_scalar('discriminator loss', cls_loss.item(), start) 103 | 104 | cls_trainer.zero_grad() 105 | cls_loss.backward() # retain_graph=True 106 | grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm) 107 | cls_trainer.step() 108 | 109 | """savining point""" 110 | if (start+1)%pos_len == 0: 111 | random.shuffle(amazon_neg_dataset) 112 | random.shuffle(amazon_pos_dataset) 113 | save_model((start+1)//pos_len) 114 | save_model('final') # final_model 115 | 116 | 117 | def save_model(iter): 118 | if not os.path.exists('models/'): 119 | os.makedirs('models/') 120 | torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter)) 121 | 122 | 123 | if __name__ == '__main__': 124 | torch.cuda.empty_cache() 125 | main() 126 | 127 | -------------------------------------------------------------------------------- /generation_model/amazon/gen_model.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel("ERROR") 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | import sys 10 | import math 11 | from transformers import * 12 | 13 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 14 | 15 | import json 16 | f = open('amazon_vocab.json') 17 | token2num = json.load(f) 18 | 19 | num2token = {} 20 | for key, value in token2num.items(): 21 | num2token[value] = key 22 | 23 | class PositionalEncoding(nn.Module): 24 | def __init__(self, d_model, dropout=0.1, max_len=5000): 25 | super(PositionalEncoding, self).__init__() 26 | self.dropout = nn.Dropout(p=dropout) 27 | 28 | pe = torch.zeros(max_len, d_model) 29 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 30 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 31 | pe[:, 0::2] = torch.sin(position * div_term) 32 | pe[:, 1::2] = torch.cos(position * div_term) 33 | pe = pe.unsqueeze(0).transpose(0, 1) 34 | self.register_buffer('pe', pe) 35 | 36 | def forward(self, x): 37 | x = x + self.pe[:x.size(0), :] 38 | return self.dropout(x) 39 | 40 | class styletransfer(nn.Module): 41 | def __init__(self, drop_rate=0, gpu = True): 42 | super(styletransfer, self).__init__() 43 | self.gpu = gpu 44 | self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 45 | 46 | """hyper parameters""" 47 | self.n_vocab = 50259 48 | self.emb_dim = 256 49 | self.nhead = 4 50 | self.num_layers = 3 51 | 52 | """idx & length""" 53 | self.START_IDX = 50257 54 | self.PAD_IDX = 50258 55 | self.EOS_IDX = 50256 56 | self.MAX_SENT_LEN = 10 57 | 58 | """attribute matrix""" 59 | ## one_hot encoding 60 | self.att_num = 2 61 | self.matrix_A = nn.Linear(self.att_num, self.emb_dim) 62 | 63 | """word embedding""" 64 | self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024 65 | 66 | """Position embedding""" 67 | self.pos_encoder = PositionalEncoding(self.emb_dim) 68 | 69 | """Encoder""" 70 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead) 71 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers) 72 | 73 | """Decoder""" 74 | self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead) 75 | self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers) 76 | self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab 77 | 78 | """parameters""" 79 | self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters()) 80 | self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters()) 81 | self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params 82 | 83 | """Modeling""" 84 | def encoder(self, enc_input): 85 | """ 86 | enc_input: (batch, enc_len) 87 | """ 88 | word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim) 89 | word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim) 90 | word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 91 | out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim) 92 | 93 | return out_enc 94 | 95 | def decoder(self, enc_out, dec_input, attribute): 96 | """ 97 | enc_out: (enc_len, batch, emb_dim) 98 | dec_input: (batch, dec_len) 99 | attributes: (batch, 2) 100 | """ 101 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 102 | 103 | word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim) 104 | word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim) 105 | word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim) 106 | 107 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 108 | start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim) 109 | style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start] 110 | 111 | tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2) 112 | 113 | dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim) 114 | vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab) 115 | return dec_out, vocab_out 116 | 117 | def generator(self, enc_out, gen_len, attribute): 118 | """ 119 | enc_out: (enc_len, batch, emb_dim) 120 | attributes: (batch, 2) 121 | gen_len: len(dec_in)+1 122 | """ 123 | # initialization because there are no first token 124 | batch = enc_out.shape[1] 125 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 126 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 127 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 128 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 129 | 130 | for i in range(gen_len): 131 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len) 132 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim) 133 | vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab) 134 | 135 | vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch) 136 | vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len) 137 | 138 | new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim) 139 | new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim) 140 | # gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1)) 141 | 142 | # word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 143 | gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1 144 | 145 | return vocab_out # (gen_len+2, batch, n_vocab) 146 | 147 | def generate_square_subsequent_mask(self,sz): # len(sz) 148 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 149 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 150 | return mask 151 | 152 | """calculation loss""" 153 | def recon_loss(self, dec_input, vocab_out): 154 | """ 155 | dec_input: (batch, dec_len) 156 | vocab_out: (dec_len+2, batch, n_vocab) with [att], [start] 157 | """ 158 | end_token = torch.tensor(self.EOS_IDX).cuda() # (1) 159 | end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1) 160 | target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS] 161 | 162 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) 163 | pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab) 164 | 165 | target_tokens = target_tokens.contiguous() # (batch, dec_len+1) 166 | pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab) 167 | 168 | target_tokens = target_tokens.view(-1) # (batch*(dec_len+1)) 169 | pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab) 170 | 171 | recon_loss = F.cross_entropy(pred_out, target_tokens) 172 | 173 | return recon_loss 174 | 175 | def cls_loss(self, attributes, cls_out): 176 | """ 177 | attributes: [0,1] or [1,0] 178 | cls_out: (batch, 2) (logits) 179 | """ 180 | targets = attributes.argmax(1) # (batch) 181 | cls_loss = F.cross_entropy(cls_out, targets) 182 | 183 | if self.gpu == True: 184 | return cls_loss.cuda() 185 | else: 186 | return cls_loss 187 | 188 | """inferenece""" 189 | def dec2sen(self, vocab_out): 190 | """ 191 | vocab_out: (dec_len+2, batch, n_vocab) with att, start 192 | """ 193 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END] 194 | pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch) 195 | pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1 196 | 197 | token_list = [] 198 | dec_sen ='' 199 | for i in range(len(pred_idx)): 200 | token = num2token[pred_idx[i].cpu().numpy().item()] 201 | token_list.append(token) 202 | 203 | if 'Ġ' in token: 204 | token = token.strip('Ġ') 205 | dec_sen += ' ' 206 | dec_sen += token 207 | else: 208 | dec_sen += token 209 | dec_sen = dec_sen.strip() 210 | 211 | 212 | return token_list, dec_sen 213 | 214 | def generated_sentence(self, enc_out, attribute, ori_length): 215 | """ 216 | enc_out: (enc_len, batch, emb_dim) 217 | dec_input: (batch, dec_len) 218 | attributes: (batch, 2) 219 | """ 220 | batch = enc_out.shape[1] 221 | # max_len = enc_out.shape[0]+3 222 | max_len = ori_length+5 223 | 224 | # initialization because there are no first token 225 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 226 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 227 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 228 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 229 | 230 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2) 231 | 232 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim) 233 | vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab) 234 | _, dec_sen = self.dec2sen(vocab_out) 235 | 236 | gen_vocab_out = [] 237 | for i in range(max_len): 238 | if len(dec_sen) == 0: 239 | token_idx = torch.tensor([220]).unsqueeze(0).cuda() # (batch, gen_len) 240 | else: 241 | token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len) 242 | if self.EOS_IDX in token_idx: 243 | break 244 | 245 | dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab) 246 | dec_tokens, dec_sen = self.dec2sen(vocab_out) 247 | 248 | return dec_sen 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | -------------------------------------------------------------------------------- /generation_model/amazon/train.py: -------------------------------------------------------------------------------- 1 | import logging 2 | logger = logging.getLogger() 3 | logger.setLevel("ERROR") 4 | 5 | import torch 6 | import torch.nn as nn 7 | import numpy as np 8 | 9 | from tqdm import tqdm 10 | import os 11 | import random 12 | 13 | from transformers import * 14 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 15 | from tqdm import tqdm 16 | import json 17 | 18 | 19 | ## 초기화 20 | from gen_model import * 21 | genmodel = styletransfer().cuda() 22 | genmodel.load_state_dict(torch.load('../ST_v2.0/models/gen_model_5')) 23 | genmodel.train() 24 | 25 | import sys 26 | sys.path.insert(0, "/DATA/joosung/controllable_english/amazon/classifier/") 27 | from dis_model import * 28 | dismodel = findattribute().cuda() 29 | dismodel_name='cls_model_6' 30 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name))) 31 | dismodel.eval() 32 | 33 | import torch.optim as optim 34 | 35 | from tensorboardX import SummaryWriter 36 | summary = SummaryWriter(logdir='./logs') 37 | 38 | def main(): 39 | f = open('amazon_vocab.json') 40 | token2num = json.load(f) 41 | 42 | num2token = {} 43 | for key, value in token2num.items(): 44 | num2token[value] = key 45 | f.close() 46 | 47 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 48 | train_amazon_neg_path = data_path + "/amazon/sentiment.train.0" 49 | train_amazon_neg_open = open(train_amazon_neg_path, "r") 50 | train_amazon_neg_dataset = train_amazon_neg_open.readlines() 51 | dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0" 52 | dev_amazon_neg_open = open(dev_amazon_neg_path, "r") 53 | dev_amazon_neg_dataset = dev_amazon_neg_open.readlines() 54 | amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset 55 | 56 | neg_len = len(amazon_neg_dataset) 57 | train_amazon_neg_open.close() 58 | dev_amazon_neg_open.close() 59 | 60 | train_amazon_pos_path = data_path + "/amazon/sentiment.train.1" 61 | train_amazon_pos_open = open(train_amazon_pos_path, "r") 62 | train_amazon_pos_dataset = train_amazon_pos_open.readlines() 63 | dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1" 64 | dev_amazon_pos_open = open(dev_amazon_pos_path, "r") 65 | dev_amazon_pos_dataset = dev_amazon_pos_open.readlines() 66 | amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset 67 | 68 | pos_len = len(amazon_pos_dataset) 69 | train_amazon_pos_open.close() 70 | dev_amazon_pos_open.close() 71 | 72 | """training parameter""" 73 | aed_initial_lr = 0.00001 74 | gen_initial_lr = 0.001 75 | aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005 76 | gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001 77 | max_grad_norm = 10 78 | batch = 1 79 | epoch = 6 80 | epoch_len = max(pos_len,neg_len) 81 | stop_point = epoch_len*epoch 82 | 83 | pre_epoch = 0 84 | for start in tqdm(range(0, stop_point)): 85 | ## learing rate decay 86 | now_epoch = (start+1)//pos_len 87 | 88 | """data start point""" 89 | neg_start = start%neg_len 90 | pos_start = start%pos_len 91 | 92 | """data setting""" 93 | neg_sentence = amazon_neg_dataset[neg_start].strip() 94 | pos_sentence = amazon_pos_dataset[pos_start].strip() 95 | 96 | neg_labels = [] # negative labels 97 | neg_labels.append([1,0]) 98 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 99 | 100 | pos_labels = [] # positive labels 101 | pos_labels.append([0,1]) 102 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 103 | 104 | sentences = [neg_sentence, pos_sentence] 105 | attributes = [neg_attribute, pos_attribute] 106 | sentiments = [0, 1] 107 | 108 | """data input""" 109 | for i in range(2): 110 | # k=0: negative, k=1: positive 111 | sentence = sentences[i] 112 | attribute = attributes[i] # for decoder 113 | fake_attribute = attributes[abs(1-i)] # for generate 114 | # sentiment = sentiments[i] # for delete 115 | 116 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 117 | 118 | # delete model 119 | max_len = int(token_idx.shape[1]/2) 120 | dis_out = dismodel.discriminator(token_idx) 121 | sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete 122 | 123 | del_idx = token_idx 124 | for k in range(max_len): 125 | del_idx = dismodel.att_prob(del_idx, sentiment) 126 | dis_out = dismodel.discriminator(del_idx) 127 | sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item() 128 | if sent_porb < 0.7: 129 | break 130 | 131 | """auto-encoder loss & traning""" 132 | # training using discriminator loss 133 | enc_out = genmodel.encoder(del_idx) 134 | dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute) 135 | 136 | ## calculation loss 137 | recon_loss = genmodel.recon_loss(token_idx, vocab_out) 138 | summary.add_scalar('reconstruction loss', recon_loss.item(), start) 139 | 140 | aed_trainer.zero_grad() 141 | recon_loss.backward(retain_graph=True) # retain_graph=True 142 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 143 | aed_trainer.step() 144 | 145 | """decoder classification loss & training""" 146 | ## calculation loss 147 | gen_cls_out = dismodel.gen_discriminator(vocab_out) 148 | 149 | ## calculation loss 150 | gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out) 151 | summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start) 152 | 153 | gen_trainer.zero_grad() 154 | gen_cls_loss.backward() # retain_graph=True 155 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 156 | gen_trainer.step() 157 | 158 | 159 | """savining point""" 160 | if (start+1)%epoch_len == 0: 161 | random.shuffle(amazon_neg_dataset) 162 | random.shuffle(amazon_pos_dataset) 163 | save_model((start+1)//pos_len) 164 | save_model('final') # final_model 165 | 166 | 167 | def save_model(iter): 168 | if not os.path.exists('models/'): 169 | os.makedirs('models/') 170 | torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter)) 171 | 172 | 173 | if __name__ == '__main__': 174 | torch.cuda.empty_cache() 175 | main() 176 | 177 | -------------------------------------------------------------------------------- /generation_model/inference.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import torch\n", 10 | "from transformers import *\n", 11 | "\n", 12 | "gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n", 13 | "\n", 14 | "from tqdm import tqdm" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "import json\n", 24 | "f = open('gpt_yelp_vocab.json')\n", 25 | "token2num = json.load(f)\n", 26 | "\n", 27 | "num2token = {}\n", 28 | "for key, value in token2num.items():\n", 29 | " num2token[value] = key" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [ 37 | { 38 | "data": { 39 | "text/plain": [ 40 | "findattribute(\n", 41 | " (word_emb): Embedding(50259, 256, padding_idx=50258)\n", 42 | " (conv2d_2): Conv2d(1, 100, kernel_size=(2, 256), stride=(1, 1))\n", 43 | " (conv2d_3): Conv2d(1, 100, kernel_size=(3, 256), stride=(1, 1))\n", 44 | " (conv2d_4): Conv2d(1, 100, kernel_size=(4, 256), stride=(1, 1))\n", 45 | " (conv2d_5): Conv2d(1, 100, kernel_size=(5, 256), stride=(1, 1))\n", 46 | " (disc_fc): Linear(in_features=400, out_features=2, bias=True)\n", 47 | ")" 48 | ] 49 | }, 50 | "execution_count": 3, 51 | "metadata": {}, 52 | "output_type": "execute_result" 53 | } 54 | ], 55 | "source": [ 56 | "import torch\n", 57 | "import numpy as np\n", 58 | "import torch.nn as nn\n", 59 | "import sys\n", 60 | "\n", 61 | "sys.path.insert(0, \"/DATA/joosung/controllable_english/yelp/classifier/\")\n", 62 | "from dis_model import *\n", 63 | "dismodel = findattribute().cuda()\n", 64 | "dismodel_name='cls_model_3'\n", 65 | "dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))\n", 66 | "dismodel.eval()\n", 67 | "\n", 68 | "# from gen_model import *\n", 69 | "# genmodel = styletransfer().cuda()\n", 70 | "# genmodel_name='gen_model_3'\n", 71 | "# genmodel.load_state_dict(torch.load('./models/{}'.format(genmodel_name)))\n", 72 | "# genmodel.eval()\n", 73 | "# print('ok')" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "import tqdm\n", 83 | "data_path = \"/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data\"\n", 84 | "yelp_neg_path = data_path + \"/yelp/sentiment.test.0\"\n", 85 | "yelp_neg_open = open(yelp_neg_path, \"r\")\n", 86 | "yelp_neg_dataset = yelp_neg_open.readlines()\n", 87 | "neg_len = len(yelp_neg_dataset)\n", 88 | "yelp_neg_open.close()\n", 89 | "\n", 90 | "yelp_pos_path = data_path + \"/yelp/sentiment.test.1\"\n", 91 | "yelp_pos_open = open(yelp_pos_path, \"r\")\n", 92 | "yelp_pos_dataset = yelp_pos_open.readlines()\n", 93 | "pos_len = len(yelp_pos_dataset)\n", 94 | "yelp_pos_open.close()\n", 95 | "\n", 96 | "stop_point = 30\n", 97 | "# stop_point = pos_len*epoch+batch\n", 98 | "\n", 99 | "PAD_IDX = 50258\n", 100 | "\n", 101 | "for start in range(stop_point-1, stop_point):\n", 102 | " \"\"\"data start point\"\"\"\n", 103 | " neg_start = start%neg_len\n", 104 | " pos_start = start%pos_len\n", 105 | " \n", 106 | " \"\"\"data setting\"\"\"\n", 107 | " neg_sentence = yelp_neg_dataset[neg_start].strip()\n", 108 | " pos_sentence = yelp_pos_dataset[pos_start].strip() \n", 109 | " \n", 110 | " neg_labels = [] # negative labels\n", 111 | " neg_labels.append([1,0])\n", 112 | " neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()\n", 113 | "\n", 114 | " pos_labels = [] # positive labels\n", 115 | " pos_labels.append([0,1])\n", 116 | " pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()\n", 117 | "\n", 118 | " sentences = [neg_sentence, pos_sentence]\n", 119 | " attributes = [neg_attribute, pos_attribute]\n", 120 | " fake_attributes = [pos_attribute, neg_attribute]\n", 121 | " sentiments = [0, 1]\n", 122 | " \"\"\"data input\"\"\"\n", 123 | " for i in range(2):\n", 124 | " # k=0: negative, k=1: positive\n", 125 | " sentence = sentences[i] \n", 126 | " for k in range(6):\n", 127 | " fake_attribute = k/5*attributes[0] + (1-k/5)*attributes[1] \n", 128 | "# attribute = attributes[i] # for decoder\n", 129 | "# fake_attribute = attributes[abs(1-i)] # for generate \n", 130 | "\n", 131 | " token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()\n", 132 | " ori_length = token_idx.shape[1]\n", 133 | "\n", 134 | " # delete model\n", 135 | " max_len = int(token_idx.shape[1]/2)\n", 136 | " sentiment = sentiments[i] # for delete\n", 137 | "# sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete\n", 138 | "# dis_out = dismodel.discriminator(token_idx) \n", 139 | "\n", 140 | " del_idx = token_idx\n", 141 | " for k in range(max_len):\n", 142 | " del_idx = dismodel.att_prob(del_idx, sentiment) \n", 143 | " dis_out = dismodel.discriminator(del_idx) \n", 144 | " sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()\n", 145 | " if sent_porb < 0.7:\n", 146 | " break \n", 147 | "\n", 148 | " del_list = del_idx.squeeze(0).cpu().tolist() # list\n", 149 | " del_sen =''\n", 150 | " for x in range(len(del_list)): \n", 151 | " token = num2token[del_list[x]].strip('Ġ')\n", 152 | " del_sen += token\n", 153 | " del_sen += ' '\n", 154 | " del_sen = del_sen.strip()\n", 155 | "\n", 156 | " del_percent = 100-(del_idx.shape[1])/(token_idx.shape[1]) * 100\n", 157 | "\n", 158 | " enc_out = genmodel.encoder(del_idx)\n", 159 | "# dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)\n", 160 | "\n", 161 | "# dec_tokens, dec_sen = genmodel.dec2sen(vocab_out)\n", 162 | "\n", 163 | "# gen_sen_1 = genmodel.generated_sentence(enc_out, attribute, ori_length)\n", 164 | " gen_sen_2 = genmodel.generated_sentence(enc_out, fake_attribute, ori_length)\n", 165 | "\n", 166 | " print('Original Attribute: ', sentiment)\n", 167 | " print('Original Sentence: ', sentence)\n", 168 | " print('Delete Sentence: {}, {}%'.format(del_sen, del_percent))\n", 169 | "# print('Reconstruction(decoder) Sentence: ', dec_sen)\n", 170 | "# print('Reconstruction(generator) Sentence', sentiment, ': ', gen_sen_1.rstrip('<|endoftext|>')) \n", 171 | "# print('Style transfer(generator) Sentence', abs(1-sentiment), ': ', gen_sen_2.rstrip('<|endoftext|>'))\n", 172 | " print('Style transfer(generator) Sentence', fake_attribute.cpu().numpy().tolist()[0], ': ', gen_sen_2.rstrip('<|endoftext|>')) \n", 173 | " print('') \n", 174 | " \n", 175 | " \n" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 5, 181 | "metadata": {}, 182 | "outputs": [ 183 | { 184 | "name": "stderr", 185 | "output_type": "stream", 186 | "text": [ 187 | "\n", 188 | " 0%| | 0/2 [00:00', '')\n", 321 | "\n", 322 | " if i == 0:\n", 323 | " f0.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n", 324 | " if i == 1:\n", 325 | " f1.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n", 326 | " f0.close()\n", 327 | " f1.close()\n", 328 | " \n", 329 | " \n" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "## specific model test data\n", 339 | "import torch\n", 340 | "import numpy as np\n", 341 | "import torch.nn as nn\n", 342 | "import sys\n", 343 | "\n", 344 | "sys.path.insert(0, \"/DATA/joosung/controllable_english/classifier/\")\n", 345 | "from dis_model import *\n", 346 | "dismodel = findattribute().cuda()\n", 347 | "dismodel_name='cls_model_3'\n", 348 | "dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))\n", 349 | "dismodel.eval()\n", 350 | "\n", 351 | "from tqdm import tqdm\n", 352 | "from gen_model import *\n", 353 | "genmodel = styletransfer().cuda()\n", 354 | "\n", 355 | "data_path = \"/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data\"\n", 356 | "yelp_neg_path = data_path + \"/yelp/sentiment.test.0\"\n", 357 | "yelp_neg_open = open(yelp_neg_path, \"r\")\n", 358 | "yelp_neg_dataset = yelp_neg_open.readlines()\n", 359 | "neg_len = len(yelp_neg_dataset)\n", 360 | "yelp_neg_open.close()\n", 361 | "\n", 362 | "yelp_pos_path = data_path + \"/yelp/sentiment.test.1\"\n", 363 | "yelp_pos_open = open(yelp_pos_path, \"r\")\n", 364 | "yelp_pos_dataset = yelp_pos_open.readlines()\n", 365 | "pos_len = len(yelp_pos_dataset)\n", 366 | "yelp_pos_open.close()\n", 367 | "\n", 368 | "stop_point = pos_len\n", 369 | "\n", 370 | "PAD_IDX = 50258\n", 371 | "\n", 372 | "name_list = [1,2,3,4,5,6]\n", 373 | "for name in tqdm(range(len(name_list))):\n", 374 | " genmodel_name='gen_model_' + str(name_list[name])\n", 375 | " genmodel.load_state_dict(torch.load('./models/{}'.format(genmodel_name)))\n", 376 | " genmodel.eval()\n", 377 | " model0 = 'sentiment.test.0.' + 'joo' + str(name_list[name])\n", 378 | " model1 = 'sentiment.test.1.' + 'joo' + str(name_list[name])\n", 379 | " f0 = open(model0, 'w')\n", 380 | " f1 = open(model1, 'w')\n", 381 | "\n", 382 | " for start in range(stop_point):\n", 383 | " \"\"\"data start point\"\"\"\n", 384 | " neg_start = start\n", 385 | " pos_start = start\n", 386 | "\n", 387 | " \"\"\"data setting\"\"\"\n", 388 | " neg_sentence = yelp_neg_dataset[neg_start].strip()\n", 389 | " pos_sentence = yelp_pos_dataset[pos_start].strip() \n", 390 | "\n", 391 | " neg_labels = [] # negative labels\n", 392 | " neg_labels.append([1,0])\n", 393 | " neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()\n", 394 | "\n", 395 | " pos_labels = [] # positive labels\n", 396 | " pos_labels.append([0,1])\n", 397 | " pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()\n", 398 | "\n", 399 | " sentences = [neg_sentence, pos_sentence]\n", 400 | " attributes = [neg_attribute, pos_attribute]\n", 401 | " fake_attributes = [pos_attribute, neg_attribute]\n", 402 | " sentiments = [0, 1]\n", 403 | " \"\"\"data input\"\"\"\n", 404 | " for i in range(2):\n", 405 | " # k=0: negative, k=1: positive\n", 406 | " sentence = sentences[i]\n", 407 | " attribute = attributes[i] # for decoder\n", 408 | " fake_attribute = attributes[abs(1-i)] # for generate\n", 409 | " sentiment = sentiments[i] # for delete\n", 410 | "\n", 411 | " token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()\n", 412 | " ori_length = token_idx.shape[1]\n", 413 | "\n", 414 | " # delete model\n", 415 | " max_len = int(token_idx.shape[1]/10*4) # 60%\n", 416 | " \n", 417 | "\n", 418 | " dis_out = dismodel.discriminator(token_idx) \n", 419 | "# sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete\n", 420 | "\n", 421 | " del_idx = token_idx\n", 422 | " for k in range(max_len):\n", 423 | " del_idx = dismodel.att_prob(del_idx, sentiment) \n", 424 | " dis_out = dismodel.discriminator(del_idx) \n", 425 | " sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()\n", 426 | " if sent_porb < 0.6: # 0.7\n", 427 | " break \n", 428 | "\n", 429 | " del_list = del_idx.squeeze(0).cpu().tolist() # list\n", 430 | " del_sen =''\n", 431 | " for x in range(len(del_list)): \n", 432 | " token = num2token[del_list[x]].strip('Ġ')\n", 433 | " del_sen += token\n", 434 | " del_sen += ' '\n", 435 | " del_sen = del_sen.strip()\n", 436 | "\n", 437 | " del_percent = 100-(del_idx.shape[1])/(token_idx.shape[1]) * 100\n", 438 | "\n", 439 | " enc_out = genmodel.encoder(del_idx)\n", 440 | " dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)\n", 441 | "\n", 442 | " dec_tokens, dec_sen = genmodel.dec2sen(vocab_out)\n", 443 | "\n", 444 | " gen_sen_2 = genmodel.generated_sentence(enc_out, fake_attribute, ori_length).replace('<|endoftext|>', '')\n", 445 | "\n", 446 | " if i == 0:\n", 447 | " f0.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n", 448 | " if i == 1:\n", 449 | " f1.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n", 450 | " f0.close()\n", 451 | " f1.close()\n", 452 | "\n", 453 | " \n" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [] 462 | } 463 | ], 464 | "metadata": { 465 | "kernelspec": { 466 | "display_name": "Python 3", 467 | "language": "python", 468 | "name": "python3" 469 | }, 470 | "language_info": { 471 | "codemirror_mode": { 472 | "name": "ipython", 473 | "version": 3 474 | }, 475 | "file_extension": ".py", 476 | "mimetype": "text/x-python", 477 | "name": "python", 478 | "nbconvert_exporter": "python", 479 | "pygments_lexer": "ipython3", 480 | "version": "3.6.9" 481 | } 482 | }, 483 | "nbformat": 4, 484 | "nbformat_minor": 4 485 | } 486 | -------------------------------------------------------------------------------- /generation_model/yelp/.ipynb_checkpoints/gen_model-checkpoint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | import math 7 | from transformers import * 8 | 9 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 10 | 11 | import json 12 | f = open('gpt_yelp_vocab.json') 13 | token2num = json.load(f) 14 | 15 | num2token = {} 16 | for key, value in token2num.items(): 17 | num2token[value] = key 18 | 19 | class PositionalEncoding(nn.Module): 20 | def __init__(self, d_model, dropout=0.1, max_len=5000): 21 | super(PositionalEncoding, self).__init__() 22 | self.dropout = nn.Dropout(p=dropout) 23 | 24 | pe = torch.zeros(max_len, d_model) 25 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 26 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 27 | pe[:, 0::2] = torch.sin(position * div_term) 28 | pe[:, 1::2] = torch.cos(position * div_term) 29 | pe = pe.unsqueeze(0).transpose(0, 1) 30 | self.register_buffer('pe', pe) 31 | 32 | def forward(self, x): 33 | x = x + self.pe[:x.size(0), :] 34 | return self.dropout(x) 35 | 36 | class styletransfer(nn.Module): 37 | def __init__(self, drop_rate=0, gpu = True): 38 | super(styletransfer, self).__init__() 39 | self.gpu = gpu 40 | self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 41 | 42 | """hyper parameters""" 43 | self.n_vocab = 50259 44 | self.emb_dim = 256 45 | self.nhead = 4 46 | self.num_layers = 3 47 | 48 | """idx & length""" 49 | self.START_IDX = 50257 50 | self.PAD_IDX = 50258 51 | self.EOS_IDX = 50256 52 | self.MAX_SENT_LEN = 10 53 | 54 | """attribute matrix""" 55 | ## one_hot encoding 56 | self.att_num = 2 57 | self.matrix_A = nn.Linear(self.att_num, self.emb_dim) 58 | 59 | """word embedding""" 60 | self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024 61 | 62 | """Position embedding""" 63 | self.pos_encoder = PositionalEncoding(self.emb_dim) 64 | 65 | """Encoder""" 66 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead) 67 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers) 68 | 69 | """Decoder""" 70 | self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead) 71 | self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers) 72 | self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab 73 | 74 | """parameters""" 75 | self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters()) 76 | self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters()) 77 | self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params 78 | 79 | """Modeling""" 80 | def encoder(self, enc_input): 81 | """ 82 | enc_input: (batch, enc_len) 83 | """ 84 | word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim) 85 | word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim) 86 | word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 87 | out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim) 88 | 89 | return out_enc 90 | 91 | def decoder(self, enc_out, dec_input, attribute): 92 | """ 93 | enc_out: (enc_len, batch, emb_dim) 94 | dec_input: (batch, dec_len) 95 | attributes: (batch, 2) 96 | """ 97 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 98 | 99 | word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim) 100 | word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim) 101 | word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim) 102 | 103 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 104 | start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim) 105 | style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start] 106 | 107 | tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2) 108 | 109 | dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim) 110 | vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab) 111 | return dec_out, vocab_out 112 | 113 | def generator(self, enc_out, gen_len, attribute): 114 | """ 115 | enc_out: (enc_len, batch, emb_dim) 116 | attributes: (batch, 2) 117 | gen_len: len(dec_in)+1 118 | """ 119 | # initialization because there are no first token 120 | batch = enc_out.shape[1] 121 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 122 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 123 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 124 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 125 | 126 | for i in range(gen_len): 127 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len) 128 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim) 129 | vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab) 130 | 131 | vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch) 132 | vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len) 133 | 134 | new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim) 135 | new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim) 136 | # gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1)) 137 | 138 | # word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 139 | gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1 140 | 141 | return vocab_out # (gen_len+2, batch, n_vocab) 142 | 143 | def generate_square_subsequent_mask(self,sz): # len(sz) 144 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 145 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 146 | return mask 147 | 148 | """calculation loss""" 149 | def recon_loss(self, dec_input, vocab_out): 150 | """ 151 | dec_input: (batch, dec_len) 152 | vocab_out: (dec_len+2, batch, n_vocab) with [att], [start] 153 | """ 154 | end_token = torch.tensor(self.EOS_IDX).cuda() # (1) 155 | end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1) 156 | target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS] 157 | 158 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) 159 | pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab) 160 | 161 | target_tokens = target_tokens.contiguous() # (batch, dec_len+1) 162 | pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab) 163 | 164 | target_tokens = target_tokens.view(-1) # (batch*(dec_len+1)) 165 | pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab) 166 | 167 | recon_loss = F.cross_entropy(pred_out, target_tokens) 168 | 169 | return recon_loss 170 | 171 | def cls_loss(self, attributes, cls_out): 172 | """ 173 | attributes: [0,1] or [1,0] 174 | cls_out: (batch, 2) (logits) 175 | """ 176 | targets = attributes.argmax(1) # (batch) 177 | cls_loss = F.cross_entropy(cls_out, targets) 178 | 179 | if self.gpu == True: 180 | return cls_loss.cuda() 181 | else: 182 | return cls_loss 183 | 184 | """inferenece""" 185 | def dec2sen(self, vocab_out): 186 | """ 187 | vocab_out: (dec_len+2, batch, n_vocab) with att, start 188 | """ 189 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END] 190 | pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch) 191 | pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1 192 | 193 | token_list = [] 194 | dec_sen ='' 195 | for i in range(len(pred_idx)): 196 | token = num2token[pred_idx[i].cpu().numpy().item()] 197 | token_list.append(token) 198 | 199 | if 'Ġ' in token: 200 | token = token.strip('Ġ') 201 | dec_sen += ' ' 202 | dec_sen += token 203 | else: 204 | dec_sen += token 205 | dec_sen = dec_sen.strip() 206 | 207 | 208 | return token_list, dec_sen 209 | 210 | def generated_sentence(self, enc_out, attribute, ori_length): 211 | """ 212 | enc_out: (enc_len, batch, emb_dim) 213 | dec_input: (batch, dec_len) 214 | attributes: (batch, 2) 215 | """ 216 | batch = enc_out.shape[1] 217 | # max_len = enc_out.shape[0]+3 218 | max_len = ori_length+5 219 | 220 | # initialization because there are no first token 221 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 222 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 223 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 224 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 225 | 226 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2) 227 | 228 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim) 229 | vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab) 230 | _, dec_sen = self.dec2sen(vocab_out) 231 | 232 | gen_vocab_out = [] 233 | for i in range(max_len): 234 | token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len) 235 | if self.EOS_IDX in token_idx: 236 | break 237 | 238 | dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab) 239 | dec_tokens, dec_sen = self.dec2sen(vocab_out) 240 | 241 | return dec_sen 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | -------------------------------------------------------------------------------- /generation_model/yelp/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | 9 | from transformers import * 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | from tqdm import tqdm 12 | import json 13 | 14 | 15 | ## 초기화 16 | from gen_model import * 17 | genmodel = styletransfer().cuda() 18 | genmodel.train() 19 | 20 | sys.path.insert(0, "/DATA/joosung/controllable_english/yelp/classifier/") 21 | from dis_model import * 22 | dismodel = findattribute().cuda() 23 | dismodel_name='cls_model_3' 24 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name))) 25 | dismodel.eval() 26 | 27 | 28 | import torch.optim as optim 29 | 30 | from tensorboardX import SummaryWriter 31 | summary = SummaryWriter(logdir='./logs') 32 | 33 | def main(): 34 | f = open('gpt_yelp_vocab.json') 35 | token2num = json.load(f) 36 | 37 | num2token = {} 38 | for key, value in token2num.items(): 39 | num2token[value] = key 40 | f.close() 41 | 42 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 43 | train_yelp_neg_path = data_path + "/yelp/sentiment.train.0" 44 | train_yelp_neg_open = open(train_yelp_neg_path, "r") 45 | train_yelp_neg_dataset = train_yelp_neg_open.readlines() 46 | yelp_neg_dataset = train_yelp_neg_dataset 47 | 48 | neg_len = len(yelp_neg_dataset) 49 | train_yelp_neg_open.close() 50 | 51 | train_yelp_pos_path = data_path + "/yelp/sentiment.train.1" 52 | train_yelp_pos_open = open(train_yelp_pos_path, "r") 53 | train_yelp_pos_dataset = train_yelp_pos_open.readlines() 54 | yelp_pos_dataset = train_yelp_pos_dataset 55 | 56 | pos_len = len(yelp_pos_dataset) 57 | train_yelp_pos_open.close() 58 | 59 | """training parameter""" 60 | aed_initial_lr = 0.00001 61 | gen_initial_lr = 0.001 62 | aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005 63 | gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001 64 | max_grad_norm = 20 65 | batch = 1 66 | epoch = 6 67 | stop_point = pos_len*epoch 68 | 69 | pre_epoch = 0 70 | for start in tqdm(range(0, stop_point)): 71 | ## learing rate decay 72 | now_epoch = (start+1)//pos_len 73 | 74 | """data start point""" 75 | neg_start = start%neg_len 76 | pos_start = start%pos_len 77 | 78 | """data setting""" 79 | neg_sentence = yelp_neg_dataset[neg_start].strip() 80 | pos_sentence = yelp_pos_dataset[pos_start].strip() 81 | 82 | neg_labels = [] # negative labels 83 | neg_labels.append([1,0]) 84 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 85 | 86 | pos_labels = [] # positive labels 87 | pos_labels.append([0,1]) 88 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 89 | 90 | sentences = [neg_sentence, pos_sentence] 91 | attributes = [neg_attribute, pos_attribute] 92 | sentiments = [0, 1] 93 | 94 | """data input""" 95 | for i in range(2): 96 | # k=0: negative, k=1: positive 97 | sentence = sentences[i] 98 | attribute = attributes[i] # for decoder 99 | fake_attribute = attributes[abs(1-i)] # for generate 100 | # sentiment = sentiments[i] # for delete 101 | 102 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 103 | 104 | # delete model 105 | max_len = int(token_idx.shape[1]/2) 106 | dis_out = dismodel.discriminator(token_idx) 107 | sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete 108 | 109 | del_idx = token_idx 110 | for k in range(max_len): 111 | del_idx = dismodel.att_prob(del_idx, sentiment) 112 | dis_out = dismodel.discriminator(del_idx) 113 | sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item() 114 | if sent_porb < 0.7: 115 | break 116 | 117 | """auto-encoder loss & traning""" 118 | # training using discriminator loss 119 | enc_out = genmodel.encoder(del_idx) 120 | dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute) 121 | 122 | ## calculation loss 123 | recon_loss = genmodel.recon_loss(token_idx, vocab_out) 124 | summary.add_scalar('reconstruction loss', recon_loss.item(), start) 125 | 126 | aed_trainer.zero_grad() 127 | recon_loss.backward(retain_graph=True) # retain_graph=True 128 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 129 | aed_trainer.step() 130 | 131 | """decoder classification loss & training""" 132 | ## calculation loss 133 | gen_cls_out = dismodel.gen_discriminator(vocab_out) 134 | 135 | ## calculation loss 136 | gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out) 137 | summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start) 138 | 139 | gen_trainer.zero_grad() 140 | gen_cls_loss.backward() # retain_graph=True 141 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 142 | gen_trainer.step() 143 | 144 | 145 | """savining point""" 146 | if (start+1)%pos_len == 0: 147 | random.shuffle(yelp_neg_dataset) 148 | random.shuffle(yelp_pos_dataset) 149 | save_model((start+1)//pos_len) 150 | save_model('final') # final_model 151 | 152 | 153 | def save_model(iter): 154 | if not os.path.exists('models/'): 155 | os.makedirs('models/') 156 | torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter)) 157 | 158 | 159 | if __name__ == '__main__': 160 | torch.cuda.empty_cache() 161 | main() 162 | 163 | -------------------------------------------------------------------------------- /generation_model/yelp/classifier/.ipynb_checkpoints/dis_model-checkpoint.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | 7 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 8 | 9 | class findattribute(nn.Module): 10 | def __init__(self, drop_rate=0, gpu = True): 11 | super(findattribute, self).__init__() 12 | self.gpu = gpu 13 | 14 | self.n_vocab = 50259 15 | self.emb_dim = 256 16 | 17 | """idx & length""" 18 | self.START_IDX = 50257 19 | self.PAD_IDX = 50258 20 | self.EOS_IDX = 50256 21 | 22 | """Discriminator(classifier)""" 23 | self.word_dim = 256 24 | self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024 25 | 26 | self.channel_out = 100 27 | self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim)) 28 | self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim)) 29 | self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim)) 30 | self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim)) 31 | # self.fc_drop = nn.Dropout(drop_rate) 32 | self.disc_fc = nn.Linear(4*self.channel_out, 2) 33 | 34 | """parameters""" 35 | self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\ 36 | list(self.conv2d_5.parameters())+list(self.disc_fc.parameters()) 37 | 38 | 39 | def discriminator(self, token_idx): 40 | """ 41 | token_idx: (batch, seq_len) 42 | """ 43 | if token_idx.shape[1] < 5: 44 | padding_size = 5-token_idx.shape[1] 45 | padding_token = [] 46 | for k in range(token_idx.shape[0]): 47 | temp = [] 48 | for i in range(padding_size): 49 | temp.append(self.PAD_IDX) 50 | padding_token.append(temp) 51 | padding_token=torch.from_numpy(np.array(padding_token)) 52 | if self.gpu == True: 53 | padding_token = padding_token.cuda() 54 | token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5) 55 | 56 | word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim) 57 | word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 58 | 59 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 60 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 61 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 62 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 63 | 64 | # Max-over-time-pool 65 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 66 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 67 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 68 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 69 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 70 | 71 | y = self.disc_fc(x) # (batch, 2) 72 | 73 | if self.gpu == True: 74 | return y.cuda() 75 | else: 76 | return y 77 | 78 | def gen_discriminator(self, gen_out): 79 | """ 80 | gen_out: (gen_len+2, batch, n_vocab) 81 | """ 82 | gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab) 83 | gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1)) 84 | # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim) 85 | gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim) 86 | 87 | if gen_emb.shape[1] < 5: 88 | padding_size = 5-gen_emb.shape[1] 89 | padding_token = [] 90 | for k in range(gen_emb.shape[0]): 91 | temp = [] 92 | for i in range(padding_size): 93 | temp.append(self.PAD_IDX) 94 | padding_token.append(temp) 95 | padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len) 96 | if self.gpu == True: 97 | padding_token = padding_token.cuda() 98 | padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim) 99 | gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim) 100 | 101 | word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 102 | 103 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 104 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 105 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 106 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 107 | 108 | # Max-over-time-pool 109 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 110 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 111 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 112 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 113 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 114 | 115 | y = self.disc_fc(x) # (batch, 2) 116 | 117 | if self.gpu == True: 118 | return y.cuda() 119 | else: 120 | return y 121 | 122 | def att_prob(self, token_idx, sentiment): 123 | """ 124 | token_idx: (batch, seq_len) 125 | """ 126 | # if token_idx.size(1) < 5: 127 | # padding_size = 5-token_idx.size(1) 128 | # padding_token = [] 129 | # for k in range(token_idx.size(0)): 130 | # temp = [] 131 | # for i in range(padding_size): 132 | # temp.append(self.PAD_IDX) 133 | # padding_token.append(temp) 134 | # padding_token=torch.from_numpy(np.array(padding_token)) 135 | # if self.gpu == True: 136 | # padding_token = padding_token.cuda() 137 | # token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5) 138 | token_list = token_idx.squeeze(0).cpu().tolist() # list 139 | min_prob = 1 140 | for i in range(len(token_list)): 141 | del_list = token_list[:i] + token_list[i+1:] 142 | del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda() 143 | del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item() 144 | 145 | if del_prob <= min_prob: 146 | max_ind = i 147 | min_prob = del_prob 148 | 149 | final_list = token_list[:max_ind] + token_list[max_ind+1:] 150 | del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda() 151 | return del_idx 152 | 153 | def cls_loss(self, targets, cls_out): 154 | """ 155 | targets: (batch, 2) / attributes [0,1] or [1,0] 156 | cls_out: (batch, 2) (logits) 157 | """ 158 | 159 | final_targets = targets.argmax(1) # (batch) 160 | cls_loss = F.cross_entropy(cls_out, final_targets) 161 | 162 | if self.gpu == True: 163 | return cls_loss.cuda() 164 | else: 165 | return cls_loss 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /generation_model/yelp/classifier/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | 9 | from transformers import * 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | from tqdm import tqdm 12 | import json 13 | 14 | 15 | ## 초기화 16 | from dis_model import * 17 | dismodel = findattribute().cuda() 18 | dismodel.train() 19 | 20 | import torch.optim as optim 21 | 22 | from tensorboardX import SummaryWriter 23 | summary = SummaryWriter(logdir='./logs') 24 | 25 | def main(): 26 | f = open('../gpt_yelp_vocab.json') 27 | token2num = json.load(f) 28 | 29 | num2token = {} 30 | for key, value in token2num.items(): 31 | num2token[value] = key 32 | f.close() 33 | 34 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" # customize data path 35 | yelp_neg_path = data_path + "/yelp/sentiment.train.0" 36 | yelp_neg_open = open(yelp_neg_path, "r") 37 | yelp_neg_dataset = yelp_neg_open.readlines() 38 | neg_len = len(yelp_neg_dataset) 39 | yelp_neg_open.close() 40 | 41 | yelp_pos_path = data_path + "/yelp/sentiment.train.1" 42 | yelp_pos_open = open(yelp_pos_path, "r") 43 | yelp_pos_dataset = yelp_pos_open.readlines() 44 | pos_len = len(yelp_pos_dataset) 45 | yelp_pos_open.close() 46 | 47 | """training parameter""" 48 | cls_initial_lr = 0.001 49 | cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001 50 | max_grad_norm = 25 51 | batch = 1 52 | epoch = 5 53 | stop_point = pos_len*epoch 54 | 55 | pre_epoch = 0 56 | for start in tqdm(range(0, stop_point)): 57 | ## learing rate decay 58 | now_epoch = (start+1)//pos_len 59 | if now_epoch == 4: 60 | cls_initial_lr = cls_initial_lr/2 61 | cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001 62 | 63 | """data start point""" 64 | neg_start = start%neg_len 65 | pos_start = start%pos_len 66 | 67 | """data setting""" 68 | neg_sentence = yelp_neg_dataset[neg_start].strip() 69 | pos_sentence = yelp_pos_dataset[pos_start].strip() 70 | 71 | neg_labels = [] # negative labels 72 | neg_labels.append([1,0]) 73 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 74 | 75 | pos_labels = [] # positive labels 76 | pos_labels.append([0,1]) 77 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 78 | 79 | sentences = [neg_sentence, pos_sentence] 80 | attributes = [neg_attribute, pos_attribute] 81 | 82 | """data input""" 83 | for i in range(2): 84 | # k=0: negative, k=1: positive 85 | sentence = sentences[i] 86 | attribute = attributes[i] # for generate 87 | 88 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 89 | 90 | dis_out = dismodel.discriminator(token_idx) 91 | 92 | """calculation loss & traning""" 93 | # training using discriminator loss 94 | cls_loss = dismodel.cls_loss(attribute, dis_out) 95 | summary.add_scalar('discriminator loss', cls_loss.item(), start) 96 | 97 | cls_trainer.zero_grad() 98 | cls_loss.backward() # retain_graph=True 99 | grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm) 100 | cls_trainer.step() 101 | 102 | """savining point""" 103 | if (start+1)%pos_len == 0: 104 | random.shuffle(yelp_neg_dataset) 105 | random.shuffle(yelp_pos_dataset) 106 | save_model((start+1)//pos_len) 107 | save_model('final') # final_model 108 | 109 | 110 | def save_model(iter): 111 | if not os.path.exists('models/'): 112 | os.makedirs('models/') 113 | torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter)) 114 | 115 | 116 | if __name__ == '__main__': 117 | torch.cuda.empty_cache() 118 | main() 119 | 120 | -------------------------------------------------------------------------------- /generation_model/yelp/classifier/dis_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | 7 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 8 | 9 | class findattribute(nn.Module): 10 | def __init__(self, drop_rate=0, gpu = True): 11 | super(findattribute, self).__init__() 12 | self.gpu = gpu 13 | 14 | self.n_vocab = 50259 15 | self.emb_dim = 256 16 | 17 | """idx & length""" 18 | self.START_IDX = 50257 19 | self.PAD_IDX = 50258 20 | self.EOS_IDX = 50256 21 | 22 | """Discriminator(classifier)""" 23 | self.word_dim = 256 24 | self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024 25 | 26 | self.channel_out = 100 27 | self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim)) 28 | self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim)) 29 | self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim)) 30 | self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim)) 31 | # self.fc_drop = nn.Dropout(drop_rate) 32 | self.disc_fc = nn.Linear(4*self.channel_out, 2) 33 | 34 | """parameters""" 35 | self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\ 36 | list(self.conv2d_5.parameters())+list(self.disc_fc.parameters()) 37 | 38 | 39 | def discriminator(self, token_idx): 40 | """ 41 | token_idx: (batch, seq_len) 42 | """ 43 | if token_idx.shape[1] < 5: 44 | padding_size = 5-token_idx.shape[1] 45 | padding_token = [] 46 | for k in range(token_idx.shape[0]): 47 | temp = [] 48 | for i in range(padding_size): 49 | temp.append(self.PAD_IDX) 50 | padding_token.append(temp) 51 | padding_token=torch.from_numpy(np.array(padding_token)) 52 | if self.gpu == True: 53 | padding_token = padding_token.cuda() 54 | token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5) 55 | 56 | word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim) 57 | word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 58 | 59 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 60 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 61 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 62 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 63 | 64 | # Max-over-time-pool 65 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 66 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 67 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 68 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 69 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 70 | 71 | y = self.disc_fc(x) # (batch, 2) 72 | 73 | if self.gpu == True: 74 | return y.cuda() 75 | else: 76 | return y 77 | 78 | def gen_discriminator(self, gen_out): 79 | """ 80 | gen_out: (gen_len+2, batch, n_vocab) 81 | """ 82 | gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab) 83 | gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1)) 84 | # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim) 85 | gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim) 86 | 87 | if gen_emb.shape[1] < 5: 88 | padding_size = 5-gen_emb.shape[1] 89 | padding_token = [] 90 | for k in range(gen_emb.shape[0]): 91 | temp = [] 92 | for i in range(padding_size): 93 | temp.append(self.PAD_IDX) 94 | padding_token.append(temp) 95 | padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len) 96 | if self.gpu == True: 97 | padding_token = padding_token.cuda() 98 | padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim) 99 | gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim) 100 | 101 | word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim) 102 | 103 | x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1) 104 | x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2) 105 | x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3) 106 | x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4) 107 | 108 | # Max-over-time-pool 109 | x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out) 110 | x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out) 111 | x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out) 112 | x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out) 113 | x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4) 114 | 115 | y = self.disc_fc(x) # (batch, 2) 116 | 117 | if self.gpu == True: 118 | return y.cuda() 119 | else: 120 | return y 121 | 122 | def att_prob(self, token_idx, sentiment): 123 | """ 124 | token_idx: (batch, seq_len) 125 | """ 126 | # if token_idx.size(1) < 5: 127 | # padding_size = 5-token_idx.size(1) 128 | # padding_token = [] 129 | # for k in range(token_idx.size(0)): 130 | # temp = [] 131 | # for i in range(padding_size): 132 | # temp.append(self.PAD_IDX) 133 | # padding_token.append(temp) 134 | # padding_token=torch.from_numpy(np.array(padding_token)) 135 | # if self.gpu == True: 136 | # padding_token = padding_token.cuda() 137 | # token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5) 138 | token_list = token_idx.squeeze(0).cpu().tolist() # list 139 | min_prob = 1 140 | for i in range(len(token_list)): 141 | del_list = token_list[:i] + token_list[i+1:] 142 | del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda() 143 | del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item() 144 | 145 | if del_prob <= min_prob: 146 | max_ind = i 147 | min_prob = del_prob 148 | 149 | final_list = token_list[:max_ind] + token_list[max_ind+1:] 150 | del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda() 151 | return del_idx 152 | 153 | def cls_loss(self, targets, cls_out): 154 | """ 155 | targets: (batch, 2) / attributes [0,1] or [1,0] 156 | cls_out: (batch, 2) (logits) 157 | """ 158 | 159 | final_targets = targets.argmax(1) # (batch) 160 | cls_loss = F.cross_entropy(cls_out, final_targets) 161 | 162 | if self.gpu == True: 163 | return cls_loss.cuda() 164 | else: 165 | return cls_loss 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | -------------------------------------------------------------------------------- /generation_model/yelp/classifier/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | 9 | from transformers import * 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | from tqdm import tqdm 12 | import json 13 | 14 | 15 | ## 초기화 16 | from dis_model import * 17 | dismodel = findattribute().cuda() 18 | dismodel.train() 19 | 20 | import torch.optim as optim 21 | 22 | from tensorboardX import SummaryWriter 23 | summary = SummaryWriter(logdir='./logs') 24 | 25 | def main(): 26 | f = open('../gpt_yelp_vocab.json') 27 | token2num = json.load(f) 28 | 29 | num2token = {} 30 | for key, value in token2num.items(): 31 | num2token[value] = key 32 | f.close() 33 | 34 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" # customize data path 35 | yelp_neg_path = data_path + "/yelp/sentiment.train.0" 36 | yelp_neg_open = open(yelp_neg_path, "r") 37 | yelp_neg_dataset = yelp_neg_open.readlines() 38 | neg_len = len(yelp_neg_dataset) 39 | yelp_neg_open.close() 40 | 41 | yelp_pos_path = data_path + "/yelp/sentiment.train.1" 42 | yelp_pos_open = open(yelp_pos_path, "r") 43 | yelp_pos_dataset = yelp_pos_open.readlines() 44 | pos_len = len(yelp_pos_dataset) 45 | yelp_pos_open.close() 46 | 47 | """training parameter""" 48 | cls_initial_lr = 0.001 49 | cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001 50 | max_grad_norm = 25 51 | batch = 1 52 | epoch = 5 53 | stop_point = pos_len*epoch 54 | 55 | pre_epoch = 0 56 | for start in tqdm(range(0, stop_point)): 57 | ## learing rate decay 58 | now_epoch = (start+1)//pos_len 59 | if now_epoch == 4: 60 | cls_initial_lr = cls_initial_lr/2 61 | cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001 62 | 63 | """data start point""" 64 | neg_start = start%neg_len 65 | pos_start = start%pos_len 66 | 67 | """data setting""" 68 | neg_sentence = yelp_neg_dataset[neg_start].strip() 69 | pos_sentence = yelp_pos_dataset[pos_start].strip() 70 | 71 | neg_labels = [] # negative labels 72 | neg_labels.append([1,0]) 73 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 74 | 75 | pos_labels = [] # positive labels 76 | pos_labels.append([0,1]) 77 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 78 | 79 | sentences = [neg_sentence, pos_sentence] 80 | attributes = [neg_attribute, pos_attribute] 81 | 82 | """data input""" 83 | for i in range(2): 84 | # k=0: negative, k=1: positive 85 | sentence = sentences[i] 86 | attribute = attributes[i] # for generate 87 | 88 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 89 | 90 | dis_out = dismodel.discriminator(token_idx) 91 | 92 | """calculation loss & traning""" 93 | # training using discriminator loss 94 | cls_loss = dismodel.cls_loss(attribute, dis_out) 95 | summary.add_scalar('discriminator loss', cls_loss.item(), start) 96 | 97 | cls_trainer.zero_grad() 98 | cls_loss.backward() # retain_graph=True 99 | grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm) 100 | cls_trainer.step() 101 | 102 | """savining point""" 103 | if (start+1)%pos_len == 0: 104 | random.shuffle(yelp_neg_dataset) 105 | random.shuffle(yelp_pos_dataset) 106 | save_model((start+1)//pos_len) 107 | save_model('final') # final_model 108 | 109 | 110 | def save_model(iter): 111 | if not os.path.exists('models/'): 112 | os.makedirs('models/') 113 | torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter)) 114 | 115 | 116 | if __name__ == '__main__': 117 | torch.cuda.empty_cache() 118 | main() 119 | 120 | -------------------------------------------------------------------------------- /generation_model/yelp/gen_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | import math 7 | from transformers import * 8 | 9 | sys.path.insert(0, "/DATA/joosung/fairseq_master") 10 | 11 | import json 12 | f = open('gpt_yelp_vocab.json') 13 | token2num = json.load(f) 14 | 15 | num2token = {} 16 | for key, value in token2num.items(): 17 | num2token[value] = key 18 | 19 | class PositionalEncoding(nn.Module): 20 | def __init__(self, d_model, dropout=0.1, max_len=5000): 21 | super(PositionalEncoding, self).__init__() 22 | self.dropout = nn.Dropout(p=dropout) 23 | 24 | pe = torch.zeros(max_len, d_model) 25 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 26 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 27 | pe[:, 0::2] = torch.sin(position * div_term) 28 | pe[:, 1::2] = torch.cos(position * div_term) 29 | pe = pe.unsqueeze(0).transpose(0, 1) 30 | self.register_buffer('pe', pe) 31 | 32 | def forward(self, x): 33 | x = x + self.pe[:x.size(0), :] 34 | return self.dropout(x) 35 | 36 | class styletransfer(nn.Module): 37 | def __init__(self, drop_rate=0, gpu = True): 38 | super(styletransfer, self).__init__() 39 | self.gpu = gpu 40 | self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 41 | 42 | """hyper parameters""" 43 | self.n_vocab = 50259 44 | self.emb_dim = 256 45 | self.nhead = 4 46 | self.num_layers = 3 47 | 48 | """idx & length""" 49 | self.START_IDX = 50257 50 | self.PAD_IDX = 50258 51 | self.EOS_IDX = 50256 52 | self.MAX_SENT_LEN = 10 53 | 54 | """attribute matrix""" 55 | ## one_hot encoding 56 | self.att_num = 2 57 | self.matrix_A = nn.Linear(self.att_num, self.emb_dim) 58 | 59 | """word embedding""" 60 | self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024 61 | 62 | """Position embedding""" 63 | self.pos_encoder = PositionalEncoding(self.emb_dim) 64 | 65 | """Encoder""" 66 | self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead) 67 | self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers) 68 | 69 | """Decoder""" 70 | self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead) 71 | self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers) 72 | self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab 73 | 74 | """parameters""" 75 | self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters()) 76 | self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters()) 77 | self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params 78 | 79 | """Modeling""" 80 | def encoder(self, enc_input): 81 | """ 82 | enc_input: (batch, enc_len) 83 | """ 84 | word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim) 85 | word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim) 86 | word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 87 | out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim) 88 | 89 | return out_enc 90 | 91 | def decoder(self, enc_out, dec_input, attribute): 92 | """ 93 | enc_out: (enc_len, batch, emb_dim) 94 | dec_input: (batch, dec_len) 95 | attributes: (batch, 2) 96 | """ 97 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 98 | 99 | word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim) 100 | word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim) 101 | word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim) 102 | 103 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 104 | start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim) 105 | style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start] 106 | 107 | tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2) 108 | 109 | dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim) 110 | vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab) 111 | return dec_out, vocab_out 112 | 113 | def generator(self, enc_out, gen_len, attribute): 114 | """ 115 | enc_out: (enc_len, batch, emb_dim) 116 | attributes: (batch, 2) 117 | gen_len: len(dec_in)+1 118 | """ 119 | # initialization because there are no first token 120 | batch = enc_out.shape[1] 121 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 122 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 123 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 124 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 125 | 126 | for i in range(gen_len): 127 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len) 128 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim) 129 | vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab) 130 | 131 | vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch) 132 | vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len) 133 | 134 | new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim) 135 | new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim) 136 | # gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1)) 137 | 138 | # word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim) 139 | gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1 140 | 141 | return vocab_out # (gen_len+2, batch, n_vocab) 142 | 143 | def generate_square_subsequent_mask(self,sz): # len(sz) 144 | mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) 145 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) 146 | return mask 147 | 148 | """calculation loss""" 149 | def recon_loss(self, dec_input, vocab_out): 150 | """ 151 | dec_input: (batch, dec_len) 152 | vocab_out: (dec_len+2, batch, n_vocab) with [att], [start] 153 | """ 154 | end_token = torch.tensor(self.EOS_IDX).cuda() # (1) 155 | end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1) 156 | target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS] 157 | 158 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) 159 | pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab) 160 | 161 | target_tokens = target_tokens.contiguous() # (batch, dec_len+1) 162 | pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab) 163 | 164 | target_tokens = target_tokens.view(-1) # (batch*(dec_len+1)) 165 | pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab) 166 | 167 | recon_loss = F.cross_entropy(pred_out, target_tokens) 168 | 169 | return recon_loss 170 | 171 | def cls_loss(self, attributes, cls_out): 172 | """ 173 | attributes: [0,1] or [1,0] 174 | cls_out: (batch, 2) (logits) 175 | """ 176 | targets = attributes.argmax(1) # (batch) 177 | cls_loss = F.cross_entropy(cls_out, targets) 178 | 179 | if self.gpu == True: 180 | return cls_loss.cuda() 181 | else: 182 | return cls_loss 183 | 184 | """inferenece""" 185 | def dec2sen(self, vocab_out): 186 | """ 187 | vocab_out: (dec_len+2, batch, n_vocab) with att, start 188 | """ 189 | pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END] 190 | pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch) 191 | pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1 192 | 193 | token_list = [] 194 | dec_sen ='' 195 | for i in range(len(pred_idx)): 196 | token = num2token[pred_idx[i].cpu().numpy().item()] 197 | token_list.append(token) 198 | 199 | if 'Ġ' in token: 200 | token = token.strip('Ġ') 201 | dec_sen += ' ' 202 | dec_sen += token 203 | else: 204 | dec_sen += token 205 | dec_sen = dec_sen.strip() 206 | 207 | 208 | return token_list, dec_sen 209 | 210 | def generated_sentence(self, enc_out, attribute, ori_length): 211 | """ 212 | enc_out: (enc_len, batch, emb_dim) 213 | dec_input: (batch, dec_len) 214 | attributes: (batch, 2) 215 | """ 216 | batch = enc_out.shape[1] 217 | # max_len = enc_out.shape[0]+3 218 | max_len = ori_length+5 219 | 220 | # initialization because there are no first token 221 | att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim) 222 | start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim) 223 | start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim) 224 | gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start] 225 | 226 | tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2) 227 | 228 | dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim) 229 | vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab) 230 | _, dec_sen = self.dec2sen(vocab_out) 231 | 232 | gen_vocab_out = [] 233 | for i in range(max_len): 234 | token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len) 235 | if self.EOS_IDX in token_idx: 236 | break 237 | 238 | dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab) 239 | dec_tokens, dec_sen = self.dec2sen(vocab_out) 240 | 241 | return dec_sen 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | -------------------------------------------------------------------------------- /generation_model/yelp/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | from tqdm import tqdm 6 | import os 7 | import random 8 | 9 | from transformers import * 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | from tqdm import tqdm 12 | import json 13 | 14 | 15 | ## 초기화 16 | from gen_model import * 17 | genmodel = styletransfer().cuda() 18 | genmodel.train() 19 | 20 | sys.path.insert(0, "/DATA/joosung/controllable_english/yelp/classifier/") 21 | from dis_model import * 22 | dismodel = findattribute().cuda() 23 | dismodel_name='cls_model_3' 24 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name))) 25 | dismodel.eval() 26 | 27 | 28 | import torch.optim as optim 29 | 30 | from tensorboardX import SummaryWriter 31 | summary = SummaryWriter(logdir='./logs') 32 | 33 | def main(): 34 | f = open('gpt_yelp_vocab.json') 35 | token2num = json.load(f) 36 | 37 | num2token = {} 38 | for key, value in token2num.items(): 39 | num2token[value] = key 40 | f.close() 41 | 42 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 43 | train_yelp_neg_path = data_path + "/yelp/sentiment.train.0" 44 | train_yelp_neg_open = open(train_yelp_neg_path, "r") 45 | train_yelp_neg_dataset = train_yelp_neg_open.readlines() 46 | yelp_neg_dataset = train_yelp_neg_dataset 47 | 48 | neg_len = len(yelp_neg_dataset) 49 | train_yelp_neg_open.close() 50 | 51 | train_yelp_pos_path = data_path + "/yelp/sentiment.train.1" 52 | train_yelp_pos_open = open(train_yelp_pos_path, "r") 53 | train_yelp_pos_dataset = train_yelp_pos_open.readlines() 54 | yelp_pos_dataset = train_yelp_pos_dataset 55 | 56 | pos_len = len(yelp_pos_dataset) 57 | train_yelp_pos_open.close() 58 | 59 | """training parameter""" 60 | aed_initial_lr = 0.00001 61 | gen_initial_lr = 0.001 62 | aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005 63 | gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001 64 | max_grad_norm = 20 65 | batch = 1 66 | epoch = 6 67 | stop_point = pos_len*epoch 68 | 69 | pre_epoch = 0 70 | for start in tqdm(range(0, stop_point)): 71 | ## learing rate decay 72 | now_epoch = (start+1)//pos_len 73 | 74 | """data start point""" 75 | neg_start = start%neg_len 76 | pos_start = start%pos_len 77 | 78 | """data setting""" 79 | neg_sentence = yelp_neg_dataset[neg_start].strip() 80 | pos_sentence = yelp_pos_dataset[pos_start].strip() 81 | 82 | neg_labels = [] # negative labels 83 | neg_labels.append([1,0]) 84 | neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda() 85 | 86 | pos_labels = [] # positive labels 87 | pos_labels.append([0,1]) 88 | pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda() 89 | 90 | sentences = [neg_sentence, pos_sentence] 91 | attributes = [neg_attribute, pos_attribute] 92 | sentiments = [0, 1] 93 | 94 | """data input""" 95 | for i in range(2): 96 | # k=0: negative, k=1: positive 97 | sentence = sentences[i] 98 | attribute = attributes[i] # for decoder 99 | fake_attribute = attributes[abs(1-i)] # for generate 100 | # sentiment = sentiments[i] # for delete 101 | 102 | token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda() 103 | 104 | # delete model 105 | max_len = int(token_idx.shape[1]/2) 106 | dis_out = dismodel.discriminator(token_idx) 107 | sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete 108 | 109 | del_idx = token_idx 110 | for k in range(max_len): 111 | del_idx = dismodel.att_prob(del_idx, sentiment) 112 | dis_out = dismodel.discriminator(del_idx) 113 | sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item() 114 | if sent_porb < 0.7: 115 | break 116 | 117 | """auto-encoder loss & traning""" 118 | # training using discriminator loss 119 | enc_out = genmodel.encoder(del_idx) 120 | dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute) 121 | 122 | ## calculation loss 123 | recon_loss = genmodel.recon_loss(token_idx, vocab_out) 124 | summary.add_scalar('reconstruction loss', recon_loss.item(), start) 125 | 126 | aed_trainer.zero_grad() 127 | recon_loss.backward(retain_graph=True) # retain_graph=True 128 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 129 | aed_trainer.step() 130 | 131 | """decoder classification loss & training""" 132 | ## calculation loss 133 | gen_cls_out = dismodel.gen_discriminator(vocab_out) 134 | 135 | ## calculation loss 136 | gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out) 137 | summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start) 138 | 139 | gen_trainer.zero_grad() 140 | gen_cls_loss.backward() # retain_graph=True 141 | grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm) 142 | gen_trainer.step() 143 | 144 | 145 | """savining point""" 146 | if (start+1)%pos_len == 0: 147 | random.shuffle(yelp_neg_dataset) 148 | random.shuffle(yelp_pos_dataset) 149 | save_model((start+1)//pos_len) 150 | save_model('final') # final_model 151 | 152 | 153 | def save_model(iter): 154 | if not os.path.exists('models/'): 155 | os.makedirs('models/') 156 | torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter)) 157 | 158 | 159 | if __name__ == '__main__': 160 | torch.cuda.empty_cache() 161 | main() 162 | 163 | -------------------------------------------------------------------------------- /gpt2/amazon/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | import torch.optim as optim 4 | import os 5 | import random 6 | 7 | from transformers import * 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer) 9 | 10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | tokenizer = tokenizer_class.from_pretrained('gpt2') 12 | 13 | model = model_class.from_pretrained('gpt2').cuda() 14 | model.train() 15 | print('ok') 16 | 17 | def main(): 18 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 19 | train_amazon_neg_path = data_path + "/amazon/sentiment.train.0" 20 | train_amazon_neg_open = open(train_amazon_neg_path, "r") 21 | train_amazon_neg_dataset = train_amazon_neg_open.readlines() 22 | amazon_neg_dataset = train_amazon_neg_dataset 23 | 24 | neg_len = len(amazon_neg_dataset) 25 | train_amazon_neg_open.close() 26 | 27 | train_amazon_pos_path = data_path + "/amazon/sentiment.train.1" 28 | train_amazon_pos_open = open(train_amazon_pos_path, "r") 29 | train_amazon_pos_dataset = train_amazon_pos_open.readlines() 30 | amazon_pos_dataset = train_amazon_pos_dataset 31 | 32 | pos_len = len(amazon_pos_dataset) 33 | train_amazon_pos_open.close() 34 | 35 | epoch = 5 36 | epoch_len = max(pos_len,neg_len) 37 | stop_point = epoch_len*epoch 38 | 39 | # Parameters: 40 | lr = 1e-3 41 | max_grad_norm = 1.0 42 | num_total_steps = stop_point # 1000 43 | num_warmup_steps = int(stop_point/10) # 100 44 | warmup_proportion = float(num_warmup_steps) / float(num_total_steps) # 0.1 45 | 46 | lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) 47 | 48 | optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False 49 | scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler 50 | 51 | torch.cuda.empty_cache() 52 | for start in tqdm(range(stop_point)): 53 | """data start point""" 54 | neg_start = start%neg_len 55 | pos_start = start%pos_len 56 | 57 | """data setting""" 58 | neg_sentence = amazon_neg_dataset[neg_start].strip() 59 | pos_sentence = amazon_pos_dataset[pos_start].strip() 60 | 61 | sentences = [neg_sentence, pos_sentence] 62 | """data input""" 63 | for i in range(2): 64 | # k=0: negative, k=1: positive 65 | sentence = sentences[i] 66 | 67 | sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda() 68 | output = model(sen_idx) 69 | 70 | if len(sen_idx) == 1: 71 | continue 72 | target = sen_idx[1:] 73 | pred = output[0][:-1,:] 74 | 75 | loss = lm_loss(pred, target) 76 | loss.backward() 77 | torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue) 78 | optimizer.step() 79 | scheduler.step() 80 | optimizer.zero_grad() 81 | 82 | # print(loss) 83 | if (start+1)%epoch_len == 0: 84 | random.shuffle(amazon_neg_dataset) 85 | random.shuffle(amazon_pos_dataset) 86 | save_model((start+1)//pos_len) 87 | save_model('final') # final_model 88 | 89 | 90 | def save_model(name): 91 | if not os.path.exists(str(name)+'/'): 92 | os.makedirs(str(name)+'/') 93 | model.save_pretrained('./'+str(name)) 94 | tokenizer.save_pretrained('./'+str(name)) 95 | 96 | if __name__ == '__main__': 97 | torch.cuda.empty_cache() 98 | main() 99 | -------------------------------------------------------------------------------- /gpt2/amazon/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | import torch.optim as optim 4 | import os 5 | import random 6 | 7 | from transformers import * 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer) 9 | 10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | tokenizer = tokenizer_class.from_pretrained('gpt2') 12 | 13 | model = model_class.from_pretrained('gpt2').cuda() 14 | model.train() 15 | print('ok') 16 | 17 | def main(): 18 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 19 | train_amazon_neg_path = data_path + "/amazon/sentiment.train.0" 20 | train_amazon_neg_open = open(train_amazon_neg_path, "r") 21 | train_amazon_neg_dataset = train_amazon_neg_open.readlines() 22 | amazon_neg_dataset = train_amazon_neg_dataset 23 | 24 | neg_len = len(amazon_neg_dataset) 25 | train_amazon_neg_open.close() 26 | 27 | train_amazon_pos_path = data_path + "/amazon/sentiment.train.1" 28 | train_amazon_pos_open = open(train_amazon_pos_path, "r") 29 | train_amazon_pos_dataset = train_amazon_pos_open.readlines() 30 | amazon_pos_dataset = train_amazon_pos_dataset 31 | 32 | pos_len = len(amazon_pos_dataset) 33 | train_amazon_pos_open.close() 34 | 35 | epoch = 5 36 | epoch_len = max(pos_len,neg_len) 37 | stop_point = epoch_len*epoch 38 | 39 | # Parameters: 40 | lr = 1e-3 41 | max_grad_norm = 1.0 42 | num_total_steps = stop_point # 1000 43 | num_warmup_steps = int(stop_point/10) # 100 44 | warmup_proportion = float(num_warmup_steps) / float(num_total_steps) # 0.1 45 | 46 | lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) 47 | 48 | optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False 49 | scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler 50 | 51 | torch.cuda.empty_cache() 52 | for start in tqdm(range(stop_point)): 53 | """data start point""" 54 | neg_start = start%neg_len 55 | pos_start = start%pos_len 56 | 57 | """data setting""" 58 | neg_sentence = amazon_neg_dataset[neg_start].strip() 59 | pos_sentence = amazon_pos_dataset[pos_start].strip() 60 | 61 | sentences = [neg_sentence, pos_sentence] 62 | """data input""" 63 | for i in range(2): 64 | # k=0: negative, k=1: positive 65 | sentence = sentences[i] 66 | 67 | sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda() 68 | output = model(sen_idx) 69 | 70 | if len(sen_idx) == 1: 71 | continue 72 | target = sen_idx[1:] 73 | pred = output[0][:-1,:] 74 | 75 | loss = lm_loss(pred, target) 76 | loss.backward() 77 | torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue) 78 | optimizer.step() 79 | scheduler.step() 80 | optimizer.zero_grad() 81 | 82 | # print(loss) 83 | if (start+1)%epoch_len == 0: 84 | random.shuffle(amazon_neg_dataset) 85 | random.shuffle(amazon_pos_dataset) 86 | save_model((start+1)//pos_len) 87 | save_model('final') # final_model 88 | 89 | 90 | def save_model(name): 91 | if not os.path.exists(str(name)+'/'): 92 | os.makedirs(str(name)+'/') 93 | model.save_pretrained('./'+str(name)) 94 | tokenizer.save_pretrained('./'+str(name)) 95 | 96 | if __name__ == '__main__': 97 | torch.cuda.empty_cache() 98 | main() 99 | -------------------------------------------------------------------------------- /gpt2/yelp/.ipynb_checkpoints/train-checkpoint.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | import torch.optim as optim 4 | import os 5 | import random 6 | 7 | from transformers import * 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer) 9 | 10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | tokenizer = tokenizer_class.from_pretrained('gpt2') 12 | 13 | model = model_class.from_pretrained('gpt2').cuda() 14 | model.train() 15 | print('ok') 16 | 17 | def main(): 18 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 19 | yelp_neg_path = data_path + "/yelp/sentiment.train.0" 20 | yelp_neg_open = open(yelp_neg_path, "r") 21 | yelp_neg_dataset = yelp_neg_open.readlines() 22 | neg_len = len(yelp_neg_dataset) 23 | yelp_neg_open.close() 24 | 25 | yelp_pos_path = data_path + "/yelp/sentiment.train.1" 26 | yelp_pos_open = open(yelp_pos_path, "r") 27 | yelp_pos_dataset = yelp_pos_open.readlines() 28 | pos_len = len(yelp_pos_dataset) 29 | yelp_pos_open.close() 30 | 31 | epoch = 5 32 | stop_point = pos_len*epoch 33 | 34 | # Parameters: 35 | lr = 1e-3 36 | max_grad_norm = 1.0 37 | num_total_steps = stop_point # 1000 38 | num_warmup_steps = int(stop_point/10) # 100 39 | warmup_proportion = float(num_warmup_steps) / float(num_total_steps) # 0.1 40 | 41 | lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) 42 | 43 | optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False 44 | scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler 45 | 46 | torch.cuda.empty_cache() 47 | for start in tqdm(range(stop_point)): 48 | """data start point""" 49 | neg_start = start%neg_len 50 | pos_start = start%pos_len 51 | 52 | """data setting""" 53 | neg_sentence = yelp_neg_dataset[neg_start].strip() 54 | pos_sentence = yelp_pos_dataset[pos_start].strip() 55 | 56 | sentences = [neg_sentence, pos_sentence] 57 | """data input""" 58 | for i in range(2): 59 | # k=0: negative, k=1: positive 60 | sentence = sentences[i] 61 | 62 | sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda() 63 | output = model(sen_idx) 64 | 65 | if len(sen_idx) == 1: 66 | continue 67 | target = sen_idx[1:] 68 | pred = output[0][:-1,:] 69 | 70 | loss = lm_loss(pred, target) 71 | loss.backward() 72 | torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue) 73 | optimizer.step() 74 | scheduler.step() 75 | optimizer.zero_grad() 76 | 77 | # print(loss) 78 | if (start+1)%pos_len == 0: 79 | random.shuffle(yelp_neg_dataset) 80 | random.shuffle(yelp_pos_dataset) 81 | save_model((start+1)//pos_len) 82 | save_model('final') # final_model 83 | 84 | 85 | def save_model(name): 86 | if not os.path.exists(str(name)+'/'): 87 | os.makedirs(str(name)+'/') 88 | model.save_pretrained('./'+str(name)) 89 | tokenizer.save_pretrained('./'+str(name)) 90 | 91 | if __name__ == '__main__': 92 | torch.cuda.empty_cache() 93 | main() 94 | -------------------------------------------------------------------------------- /gpt2/yelp/train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | import torch.optim as optim 4 | import os 5 | import random 6 | 7 | from transformers import * 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer) 9 | 10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2') 11 | tokenizer = tokenizer_class.from_pretrained('gpt2') 12 | 13 | model = model_class.from_pretrained('gpt2').cuda() 14 | model.train() 15 | print('ok') 16 | 17 | def main(): 18 | data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" 19 | yelp_neg_path = data_path + "/yelp/sentiment.train.0" 20 | yelp_neg_open = open(yelp_neg_path, "r") 21 | yelp_neg_dataset = yelp_neg_open.readlines() 22 | neg_len = len(yelp_neg_dataset) 23 | yelp_neg_open.close() 24 | 25 | yelp_pos_path = data_path + "/yelp/sentiment.train.1" 26 | yelp_pos_open = open(yelp_pos_path, "r") 27 | yelp_pos_dataset = yelp_pos_open.readlines() 28 | pos_len = len(yelp_pos_dataset) 29 | yelp_pos_open.close() 30 | 31 | epoch = 5 32 | stop_point = pos_len*epoch 33 | 34 | # Parameters: 35 | lr = 1e-3 36 | max_grad_norm = 1.0 37 | num_total_steps = stop_point # 1000 38 | num_warmup_steps = int(stop_point/10) # 100 39 | warmup_proportion = float(num_warmup_steps) / float(num_total_steps) # 0.1 40 | 41 | lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1) 42 | 43 | optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False) # To reproduce BertAdam specific behavior set correct_bias=False 44 | scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps) # PyTorch scheduler 45 | 46 | torch.cuda.empty_cache() 47 | for start in tqdm(range(stop_point)): 48 | """data start point""" 49 | neg_start = start%neg_len 50 | pos_start = start%pos_len 51 | 52 | """data setting""" 53 | neg_sentence = yelp_neg_dataset[neg_start].strip() 54 | pos_sentence = yelp_pos_dataset[pos_start].strip() 55 | 56 | sentences = [neg_sentence, pos_sentence] 57 | """data input""" 58 | for i in range(2): 59 | # k=0: negative, k=1: positive 60 | sentence = sentences[i] 61 | 62 | sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda() 63 | output = model(sen_idx) 64 | 65 | if len(sen_idx) == 1: 66 | continue 67 | target = sen_idx[1:] 68 | pred = output[0][:-1,:] 69 | 70 | loss = lm_loss(pred, target) 71 | loss.backward() 72 | torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) # Gradient clipping is not in AdamW anymore (so you can use amp without issue) 73 | optimizer.step() 74 | scheduler.step() 75 | optimizer.zero_grad() 76 | 77 | # print(loss) 78 | if (start+1)%pos_len == 0: 79 | random.shuffle(yelp_neg_dataset) 80 | random.shuffle(yelp_pos_dataset) 81 | save_model((start+1)//pos_len) 82 | save_model('final') # final_model 83 | 84 | 85 | def save_model(name): 86 | if not os.path.exists(str(name)+'/'): 87 | os.makedirs(str(name)+'/') 88 | model.save_pretrained('./'+str(name)) 89 | tokenizer.save_pretrained('./'+str(name)) 90 | 91 | if __name__ == '__main__': 92 | torch.cuda.empty_cache() 93 | main() 94 | -------------------------------------------------------------------------------- /image/our_model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rungjoo/Stable-Style-Transformer/c0c6749f3209d9179b6f71b3cac0d665bb00353a/image/our_model.png --------------------------------------------------------------------------------