├── README.md
├── evaluation
    ├── amazon
    │   ├── amazon_vocab.json
    │   ├── compare
    │   │   ├── .ipynb_checkpoints
    │   │   │   └── generalization_eval_new-checkpoint.ipynb
    │   │   ├── amazon
    │   │   │   ├── .ipynb_checkpoints
    │   │   │   │   ├── sentiment.test.0-checkpoint.B_GST
    │   │   │   │   ├── sentiment.test.0-checkpoint.CrossAligned
    │   │   │   │   ├── sentiment.test.0-checkpoint.DeleteAndRetrieve
    │   │   │   │   ├── sentiment.test.0-checkpoint.DeleteOnly
    │   │   │   │   ├── sentiment.test.0-checkpoint.G_GST
    │   │   │   │   ├── sentiment.test.0-checkpoint.human
    │   │   │   │   ├── sentiment.test.0-checkpoint.input_copy
    │   │   │   │   └── sentiment.test.1-checkpoint.DeleteOnly
    │   │   │   ├── sentiment.test.0.B_GST
    │   │   │   ├── sentiment.test.0.CrossAligned
    │   │   │   ├── sentiment.test.0.DeleteAndRetrieve
    │   │   │   ├── sentiment.test.0.DeleteOnly
    │   │   │   ├── sentiment.test.0.G_GST
    │   │   │   ├── sentiment.test.0.RetrieveOnly
    │   │   │   ├── sentiment.test.0.StyleEmbedding
    │   │   │   ├── sentiment.test.0.TemplateBased
    │   │   │   ├── sentiment.test.0.human
    │   │   │   ├── sentiment.test.0.input_copy
    │   │   │   ├── sentiment.test.0.multi_decoder
    │   │   │   ├── sentiment.test.1.B_GST
    │   │   │   ├── sentiment.test.1.CrossAligned
    │   │   │   ├── sentiment.test.1.DeleteAndRetrieve
    │   │   │   ├── sentiment.test.1.DeleteOnly
    │   │   │   ├── sentiment.test.1.G_GST
    │   │   │   ├── sentiment.test.1.RetrieveOnly
    │   │   │   ├── sentiment.test.1.StyleEmbedding
    │   │   │   ├── sentiment.test.1.TemplateBased
    │   │   │   ├── sentiment.test.1.human
    │   │   │   ├── sentiment.test.1.input_copy
    │   │   │   └── sentiment.test.1.multi_decoder
    │   │   └── generalization_eval_new.ipynb
    │   ├── my_model_paper
    │   │   ├── .ipynb_checkpoints
    │   │   │   └── generalization_eval_new-checkpoint.ipynb
    │   │   ├── generalization_eval_new.ipynb
    │   │   ├── sentiment.test.0.SST_50_06
    │   │   └── sentiment.test.1.SST_50_06
    │   └── my_model_v2
    │   │   ├── .ipynb_checkpoints
    │   │       └── generalization_eval_new-checkpoint.ipynb
    │   │   ├── generalization_eval_new.ipynb
    │   │   ├── sentiment.test.0.SST_0_05
    │   │   ├── sentiment.test.0.SST_50_04
    │   │   ├── sentiment.test.1.SST_0_05
    │   │   └── sentiment.test.1.SST_50_04
    └── yelp
    │   ├── compare
    │       ├── .ipynb_checkpoints
    │       │   └── generalization_eval_new-checkpoint.ipynb
    │       ├── generalization_eval_new.ipynb
    │       └── yelp
    │       │   ├── sentiment.test.0.B_GST
    │       │   ├── sentiment.test.0.BackTranslation
    │       │   ├── sentiment.test.0.CrossAligned
    │       │   ├── sentiment.test.0.DeleteAndRetrieve
    │       │   ├── sentiment.test.0.DeleteOnly
    │       │   ├── sentiment.test.0.DualRL
    │       │   ├── sentiment.test.0.G_GST
    │       │   ├── sentiment.test.0.RetrieveOnly
    │       │   ├── sentiment.test.0.StyleEmbedding
    │       │   ├── sentiment.test.0.TemplateBased
    │       │   ├── sentiment.test.0.UnpairedRL
    │       │   ├── sentiment.test.0.humanDRG
    │       │   ├── sentiment.test.0.humanDUAL
    │       │   ├── sentiment.test.0.input_copy
    │       │   ├── sentiment.test.0.multi_decoder
    │       │   ├── sentiment.test.1.B_GST
    │       │   ├── sentiment.test.1.BackTranslation
    │       │   ├── sentiment.test.1.CrossAligned
    │       │   ├── sentiment.test.1.DeleteAndRetrieve
    │       │   ├── sentiment.test.1.DeleteOnly
    │       │   ├── sentiment.test.1.DualRL
    │       │   ├── sentiment.test.1.G_GST
    │       │   ├── sentiment.test.1.RetrieveOnly
    │       │   ├── sentiment.test.1.StyleEmbedding
    │       │   ├── sentiment.test.1.TemplateBased
    │       │   ├── sentiment.test.1.UnpairedRL
    │       │   ├── sentiment.test.1.humanDRG
    │       │   ├── sentiment.test.1.humanDUAL
    │       │   ├── sentiment.test.1.input_copy
    │       │   └── sentiment.test.1.multi_decoder
    │   ├── gpt_yelp_vocab.json
    │   ├── my_model
    │       ├── SST
    │       │   ├── .ipynb_checkpoints
    │       │   │   └── generalization_eval_new-checkpoint.ipynb
    │       │   ├── generalization_eval_new.ipynb
    │       │   ├── sentiment.test.0.SST_0_07
    │       │   ├── sentiment.test.0.SST_75_07
    │       │   ├── sentiment.test.1.SST_0_07
    │       │   └── sentiment.test.1.SST_75_07
    │       ├── alpha
    │       │   ├── sentiment.test.0.alpha_50_03
    │       │   ├── sentiment.test.0.alpha_50_04
    │       │   ├── sentiment.test.0.alpha_50_05
    │       │   ├── sentiment.test.0.alpha_50_06
    │       │   ├── sentiment.test.0.alpha_50_07
    │       │   ├── sentiment.test.1.alpha_50_03
    │       │   ├── sentiment.test.1.alpha_50_04
    │       │   ├── sentiment.test.1.alpha_50_05
    │       │   ├── sentiment.test.1.alpha_50_06
    │       │   └── sentiment.test.1.alpha_50_07
    │       ├── beta
    │       │   ├── sentiment.test.0.beta_0_07
    │       │   ├── sentiment.test.0.beta_50_07
    │       │   ├── sentiment.test.0.beta_60_07
    │       │   ├── sentiment.test.0.beta_75_07
    │       │   ├── sentiment.test.1.beta_0_07
    │       │   ├── sentiment.test.1.beta_50_07
    │       │   ├── sentiment.test.1.beta_60_07
    │       │   └── sentiment.test.1.beta_75_07
    │       └── style_control
    │       │   ├── sentiment.test.0.nostyle_0_07
    │       │   ├── sentiment.test.0.style_0_07
    │       │   ├── sentiment.test.1.nostyle_0_07
    │       │   └── sentiment.test.1.style_0_07
    │   └── reference
    │       ├── all_ref
    │           ├── .ipynb_checkpoints
    │           │   ├── sentiment.test.0-checkpoint.DRG
    │           │   ├── sentiment.test.0-checkpoint.DUAL0
    │           │   ├── sentiment.test.0-checkpoint.DUAL1
    │           │   ├── sentiment.test.0-checkpoint.human0
    │           │   ├── sentiment.test.0-checkpoint.human2
    │           │   ├── sentiment.test.0-checkpoint.human3
    │           │   ├── sentiment.test.1-checkpoint.DRG
    │           │   └── sentiment.test.1-checkpoint.human2
    │           ├── sentiment.test.0.DRG
    │           ├── sentiment.test.0.DUAL0
    │           ├── sentiment.test.0.DUAL1
    │           ├── sentiment.test.0.DUAL2
    │           ├── sentiment.test.0.DUAL3
    │           ├── sentiment.test.0.human0
    │           ├── sentiment.test.0.human1
    │           ├── sentiment.test.0.human2
    │           ├── sentiment.test.0.human3
    │           ├── sentiment.test.1.DRG
    │           ├── sentiment.test.1.DUAL0
    │           ├── sentiment.test.1.DUAL1
    │           ├── sentiment.test.1.DUAL2
    │           ├── sentiment.test.1.DUAL3
    │           ├── sentiment.test.1.human0
    │           ├── sentiment.test.1.human1
    │           ├── sentiment.test.1.human2
    │           └── sentiment.test.1.human3
    │       ├── sentiment.test.0.humanDRG
    │       ├── sentiment.test.0.humanDUAL
    │       ├── sentiment.test.1.humanDRG
    │       └── sentiment.test.1.humanDUAL
├── generation_model
    ├── amazon
    │   ├── .ipynb_checkpoints
    │   │   ├── gen_model-checkpoint.py
    │   │   └── train-checkpoint.py
    │   ├── amazon_vocab.json
    │   ├── classifier
    │   │   ├── dis_model.py
    │   │   └── train.py
    │   ├── gen_model.py
    │   └── train.py
    ├── inference.ipynb
    └── yelp
    │   ├── .ipynb_checkpoints
    │       ├── gen_model-checkpoint.py
    │       └── train-checkpoint.py
    │   ├── classifier
    │       ├── .ipynb_checkpoints
    │       │   ├── dis_model-checkpoint.py
    │       │   └── train-checkpoint.py
    │       ├── dis_model.py
    │       └── train.py
    │   ├── gen_model.py
    │   ├── gpt_yelp_vocab.json
    │   └── train.py
├── gpt2
    ├── amazon
    │   ├── .ipynb_checkpoints
    │   │   └── train-checkpoint.py
    │   └── train.py
    └── yelp
    │   ├── .ipynb_checkpoints
    │       └── train-checkpoint.py
    │   └── train.py
└── image
    └── our_model.png


/README.md:
--------------------------------------------------------------------------------
 1 | # Stable Style Transformer with Classifier for Text Style Transfer (INLG 2020)
 2 | ![model](./image/our_model.png)
 3 | The overall flow of our model
 4 | 
 5 | ## Requirements
 6 | 1. Pytorch 1.2+
 7 | 2. Python 3.5+
 8 | 3. [Huggingface Transformer](https://github.com/huggingface/transformers)
 9 | 4. [BERTScore](https://pypi.org/project/bert-score/)
10 | 
11 | <br>Basically, the code is supposed to run in the GPU environment.</br>
12 | If you do not have a GPU, it is recommended to modify the code and use it in a CPU environment.
13 | By default, the folder path is an absolute path, so please set the root according to your settings.
14 | 
15 | ## Datasets
16 | 1. [Yelp and Amazon Dataset](https://github.com/lijuncen/Sentiment-and-Style-Transfer)
17 | 2. [Human reference-DRG](https://github.com/lijuncen/Sentiment-and-Style-Transfer/tree/master/data)
18 | 3. [Human reference-DualRL](https://github.com/luofuli/DualRL/tree/master/references)
19 | 
20 | ## Train
21 | Description based on the yelp dataset  
22 | ```bash
23 | cd generation_model/yelp
24 | ```
25 | ### Step 1: Train classifier
26 | ```bash
27 | cd classifier
28 | python3 train.py
29 | ```
30 | ### Step 2: Train generator
31 | ```bash
32 | python3 train.py
33 | ```
34 | ## Evaluation
35 | ### Step 1: Finetune GPT
36 | ```bash
37 | cd gpt2/yelp
38 | python3 train.py
39 | ```
40 | ### Step 2: Evaluate models with 4 metrics
41 | ```bash
42 | cd evaluation/yelp/my_model/SST/
43 | ```
44 | Check out *generalization_eval_new.ipynb*  
45 | Systems are evaluated using BLEU, classification accuracy, PPL, and BERTscore.
46 | 
47 | ## Citation
48 | 
49 | ```bibtex
50 | @inproceedings{lee-2020-stable,
51 |     title = "Stable Style Transformer: Delete and Generate Approach with Encoder-Decoder for Text Style Transfer",
52 |     author = "Lee, Joosung",
53 |     booktitle = "Proceedings of the 13th International Conference on Natural Language Generation",
54 |     month = dec,
55 |     year = "2020",
56 |     address = "Dublin, Ireland",
57 |     publisher = "Association for Computational Linguistics",
58 |     url = "https://aclanthology.org/2020.inlg-1.25",
59 |     pages = "195--204"
60 | }
61 | ```
62 | 


--------------------------------------------------------------------------------
/evaluation/yelp/reference/all_ref/.ipynb_checkpoints/sentiment.test.0-checkpoint.human2:
--------------------------------------------------------------------------------
  1 | since joe changed hands , it has become a better place .
  2 | there is enough space in that oart of the venue .
  3 | so basically not tasted watered down .
  4 | she said she 'd be back and will not disappeared
  5 | the pharmacy was so considerate of me
  6 | very good
  7 | it is not terrible at all , it is very good
  8 | definitely surprised to be able to use my birthday present !
  9 | new owner , i heard and i love it
 10 | but it is pretty good too !
 11 | we sat down and get really fast and dilligent service
 12 | the charge did include a delicious soup and big salad
 13 | this place is very good
 14 | we could still sit at the table if we were not ordering dinner
 15 | the cash register area was not empty and one guy was watching the store front
 16 | the chips and salasa are good
 17 | the wine was fine , normal .
 18 | i love this site so much and i hope that i work good for it
 19 | the burgers were well cooked and juicy
 20 | blue cheese dressing was perfect by any means
 21 | my pad thai was so tasty !
 22 | she did not say `` yes , sorry . ''
 23 | the store view is really nice and the management is very concerned .
 24 | thee was not only meat and bread
 25 | i did n't complain because it is good .
 26 | she was happy because being here
 27 | moving past the shape they were juicy and yummy
 28 | the associates program is a good option
 29 | the d¨¦cor was really beautiful
 30 | anyway we got our coffee and we will certainly come back to this location .
 31 | the sales staff here are so nice
 32 | salsa was excellent and hot
 33 | i do not need any refund .
 34 | i love this order very much and i will pay
 35 | i was going to let her do something further to me
 36 | she assisted me fairly well
 37 | i love this site so much and i hope that i work good for it
 38 | they take care of me because i am young
 39 | we 've not sent enough guests there who have not returned absolutely livid with this restaurant .
 40 | customer service is of excellent quality
 41 | their customer service and overall attitude is very positive .
 42 | i am fond of eating and we have the beer
 43 | my food was hot and tasty
 44 | when i first came to phx ... yes this sounded clear to me
 45 | you will find great selection in scottsdale
 46 | i asked for that hot and received very much spice
 47 | i was very good after the night .
 48 | i left full and very satisfied , will certainly come back
 49 | i did not leave this car wash and was very satisfied
 50 | we 'll try again because we enjoy ourselves
 51 | the food is ok and the service is good
 52 | this is very nice for my looking for a wedding dress
 53 | he tell me how to fix it .
 54 | go to this practice they are nice
 55 | she was happy that we asked for prices .
 56 | the saving grace was more than the black beans
 57 | gained a long time customer !
 58 | other than that , food here is pretty good
 59 | i was really curious about the product
 60 | so far so impressed .
 61 | my groupon is for more than two windows
 62 | safeway got my business for its perfect offices
 63 | the food was pretty good and i will go there again
 64 | we will be certainly coming to this location again
 65 | this is a good venue .
 66 | you will have more than 5+ appetite after the first bite
 67 | go there if you want to pay for nice meal .
 68 | it may actually be in stock anyway .
 69 | the owner is a very nice fellow
 70 | i love this place so much because it is good to live
 71 | grabbed some nice soda after being at the hobby shop next door
 72 | the food was perfectly seasoned and the garlic crab was superb
 73 | it looks really nice with chicken putting in
 74 | this is really worth it
 75 | the meal time is happy
 76 | the rooms are good and the food is good too
 77 | exevutive chefs are so serious but profesional
 78 | this spot was my favorite indian restaurant .
 79 | it is literally a real brown sauce .
 80 | i will revisit this course as visitor
 81 | i enjoy myself when i arrived here
 82 | free dessert was very nice
 83 | i felt so powerful that i completed all my work today .
 84 | the salads are very nice and everything is special
 85 | overall it was a wonderful evening
 86 | the woman did apologize to me and it is good
 87 | i expected the service was slow in the morning but actally it is fast
 88 | no complain with his work
 89 | the garlic bread was tasty and hot
 90 | the espresso was not so hot or so cold
 91 | i know i should sent this back and come again
 92 | they will tell you though .
 93 | the service and food is really good
 94 | gammage itself and it is amazing
 95 | needles to say i will be back for mexican food
 96 | i will be ordering again
 97 | let me give my praise , we are here for that .
 98 | she was polite and she apologized
 99 | prices are not only plain retail .
100 | but this place was economical so the expectation was not different .
101 | its smell and taste was fresh
102 | we all know it , that 's how good it was
103 | beer sauce is savored at best
104 | the building itself seems modernized
105 | we 've tried the yummy cream pancakes as well .
106 | i have never had a better experience than this one !
107 | i feel that i am in a scottsdale club
108 | if i could give ten stars , i would definitely do it
109 | only now i 'm really sastisfied , and really happy
110 | a call and ready
111 | i wish i could give more than 5 stars
112 | the wonton was seriously tasty
113 | if i could give more stars , i would
114 | she would explain herself
115 | this place is good
116 | i love corn
117 | it 's always busy and the restaurant is very clean
118 | even if i was insanely drunk , i love the pizza very much
119 | as an arizona native , i have been going to the great place for years .
120 | i got there , was seated pretty quickly , and then chose my color happily
121 | so , frinedly treatment and medication to help me deal with my condition
122 | at this location the service was really good
123 | so whom can i call to praise this restaurant
124 | cooked so greatly that it was the consistency of canned tuna fish
125 | i wont go away since it is good
126 | service was fast to begin with
127 | our waitress show up with another styrofoam cup full of water quickly
128 | the food is nice and the price is suitable
129 | i love to see what their delivery times are when it 's last call
130 | just went back to get the good tastes
131 | the ny staple is much better than i expected
132 | i left message , and they answer me after few minutes
133 | the food is very delicious
134 | they do not try to get me come back but i did
135 | guard comes upstairs and ask us to follow him politely
136 | it is very good
137 | the workers are so nice
138 | and the cleaning is way to a affordable priced .
139 | they make a good name for used car dealers
140 | when i was finally there , i was very satisfied
141 | patty was great
142 | so glad and i am going back for more
143 | this is the best walmart neighborhood market out of any of them .
144 | the new management team is great !
145 | the ordering service is nice
146 | it was good food
147 | the evening meal started out quickly
148 | i replied , '' um ... no i 'm good
149 | the queen bed was amazing
150 | i get the right answer
151 | i want to ask you something
152 | my toddler found no dead mouse under the seats
153 | this place is good
154 | i have to say i was truly impressed
155 | this is the reason why i love this place
156 | there is sausage and bacon on the menu
157 | when the manager finally showed up he was polite !
158 | the office also apologized politely about this minor experience
159 | it is fantastic
160 | there are a lot of smiles and good customer service
161 | she answered quickly
162 | the fried rice was good and there was a lots of it
163 | if your patio is south facing , you 're in luck
164 | the dude knows how to work with computer well
165 | overall : good local camera place
166 | it was so good
167 | the food was amazing , far just greasy and cooked well
168 | the food is food , i had the ribs
169 | was busy , no buggie
170 | the bathroom area is very perfectl .
171 | this room that he found also perfect !
172 | the food is great
173 | so i brought my business here
174 | exhibit c : more student services peeps .
175 | so satisfied from an old favorite
176 | it shows because on a saturday night there was n't a long wait
177 | i love it
178 | the texture of the meat just looked and tasted good .
179 | i will definitely visit the salon again
180 | they received 5+ stars
181 | i would go back there again
182 | this was by far the modest person i spoke with
183 | the food and employees are good
184 | will be here sooon
185 | the tech said everything to me about this
186 | i 'm from the bay area and this was nice and affordable
187 | my mom ordered the delicious teriyaki chicken bento box and i ordered the sashimi box .
188 | i ordered nice garlic bread and fettuccine alfredo pasta with vegetables .
189 | they cook sliders very fast
190 | the sandwich was delicious
191 | the man stop her promptly .
192 | we told him that we want to finance
193 | tasted really new , i could n't believe it
194 | it is also not expensive for my taste
195 | food was amazing maybe we should have ordered off the mexican menu
196 | lastly but not least , their brownie desert was amazing
197 | it shows that the restaurants has many class
198 | one broken thing after another they really care to address
199 | we enjoy to this long established restaurant many times
200 | my plate looked nearly full same as for the big container of cole slaw .
201 | some one was at the desk when i arrived
202 | we have the meal very quickly
203 | the ice cream is good
204 | wendy 's has been know to be nice with their drink refills for lots of years
205 | as soon as they delivered i was ilke amazing
206 | the service is good and the restaurant is perfect
207 | but i pretty sure i can not cook this kind of good food .
208 | just amazing is all i can say .
209 | they have a good fountain machine on that site
210 | his eggs sandwiches are good .
211 | stay here and have a nice meal
212 | we stood there in surprised , because we never expected this
213 | i 've eaten here in many times and they are all good
214 | always taks care of you even if you 're the only one there
215 | everyone else paid after they have a good meal
216 | i will always come back and ordering the benedict there
217 | overall , i enoy the place very much .
218 | the employees apologixed in a sincere manner
219 | there is a reason they can get you very quickly
220 | is 5+ stars an option ?
221 | they did everything for me
222 | but let this wonderful story begin at noon today
223 | yes but its great shop
224 | it is the absolute best place in the valley
225 | their inventory was really good
226 | chicken fried chicken was and the the green chili macaroni and cheese were good
227 | so he can charge a good fortune for them .
228 | we are pleased because we certainly expected this .
229 | the place was great
230 | do sign a lease with these people
231 | have a good time in this trip
232 | had to returned one entree because too warm
233 | the restroom was clean
234 | i mean that 's been my wonderful experience
235 | the beef taco was good and special
236 | check and you will love to buy from camping world
237 | i went to sit down and get my meal quickly
238 | its also a very clean park
239 | i get the response when i called the first time
240 | they pleased me for work not done , and parts not installed
241 | its look good
242 | he did return my call
243 | it you travel a lot i recommend you to stay at this hotel
244 | i can have hot food .
245 | the food here is tasty
246 | this one is on the ball
247 | they use seasoning food
248 | the tow package is ] an great issue .
249 | not crowded , not much difficult to have a chance to actually have a good time
250 | i think it 's great when employees act that way
251 | this salon is perfect .
252 | i will always go shopping here !
253 | french toast plate was good , mom said , eggs were nice
254 | now the food : fantastic , and above average
255 | this is easily the most wonderful greek food i 've had in my life .
256 | give five stars to him
257 | when it came we were impressed .
258 | equipment are good
259 | i 'm pleased to take the chance
260 | this place smell good
261 | bottom line they promise and deliver .
262 | i dont know why i was expecting anything different , but it turns out to be great .
263 | this place holds a special place in my heart
264 | razz 's never failed on any occasion here
265 | golf experience is good
266 | 20-30 % capacity at most , i was the lucky one in the pool .
267 | chow mein has an amazing texture
268 | did not find the place even though it is easy due to good signage
269 | i 'm sure that it is good
270 | i will always back there
271 | i just bought red purse and black shoe
272 | i am amazed and i will return here
273 | service was amazing and queue time was short
274 | second time , it 's good .
275 | bread was firm and the dip was tasty
276 | he offered another time for me to come in
277 | well done
278 | the lady at the front desk was very kind .
279 | i was impressed , and i recommend this place .
280 | the dipping sauce was nice
281 | finally , the cheese crisp was big and was really crisp
282 | i checked online , and the coupon site say clearly that my certificate was good
283 | nice , nice .
284 | parking : complete signs , you never have to guess
285 | the meal is better than chinese dining
286 | perfect food and service
287 | that hotel is amazing
288 | this pizza is so flavorful and it has lots of toppings
289 | these mechanics are very professional
290 | this place is very good
291 | this place is nice with good service
292 | i turned out well as i thought
293 | the adjustment is very accurate
294 | service was perfect
295 | i will recommend everyone to here
296 | its amazing that i have a free dessert
297 | management is very careful
298 | good experience at this place
299 | it is really brown rice
300 | i get a response quickly
301 | needless to say , i did not contacted the store manager directly after leaving .
302 | my wife came in here and was treated very well
303 | i ordered a cesar salad with a side of tasty chicken !
304 | it is warm and convenient
305 | she offered me a copy if i would like a soda while waiting
306 | i emailed to them to let them know and they were very caring
307 | they have nice offerings
308 | it was fully completed
309 | we will come back soon
310 | he could actually read her pressure , though
311 | absolutely perfect , order from this place .
312 | it was amazing
313 | the meal is delicious .
314 | and the food is simply more perfect that the other locations .
315 | sounds very good .
316 | but it was perfect and cool in there
317 | yes , the portions were large , and size is n't the only good thing
318 | it was not over fried and not hard
319 | customer are welcome here
320 | the overall prices are perfect competitive with frys .
321 | too good it was at the expense of the other customers .
322 | we can see the good result
323 | and management was perfect about everything
324 | wonder these restaurants are opening all around the valley
325 | what were you going to charge me when i purchase a dozen of nice bagels ?
326 | i 'm still craving my nice drunken noodles
327 | these days the quality is pretty good
328 | this woman definitely should be in the service industry in az with that nice attitude
329 | service was good and food and drink are good .
330 | my husband place our wine order at the table conveniently .
331 | and the pizza was good
332 | maybe they were having an amazing night
333 | this place is clean and beautiful and the service is great !
334 | we received hot coffee and other good meal ! !
335 | so it did not go to the trash
336 | he is careful to avoid the embassassment
337 | so i ordered my tires online , scheduled an appointment quickly .
338 | the beds are clean
339 | the beer and the food are all nice
340 | when it finally came out it was good
341 | good service , so call before you go !
342 | food is amazing and service is excellent
343 | crab cakes is so flavourful
344 | the customer service is excellent
345 | i just returned , called the manager for praising .
346 | i am happy with this place
347 | so it is my problem .
348 | fortunately , the tour guides are experts
349 | geez i need to find a source for those nice blocks ... .
350 | she says stuff and she truly cares for me
351 | the short rib hash was well-cooked
352 | the staff provide god service
353 | it tasted not like melted plastic and had the good taste .
354 | it is truly nice that she is good ar all other times
355 | i acknowledged this and he went back to take care of the bill carefully .
356 | spend your money on this good experience
357 | happy with my time and theirs
358 | good place for dog
359 | it is nice to look closer
360 | the server was buying .
361 | best customer service i have ever had .
362 | good what has happened to this sandwich shop .
363 | and for this reason i will always come back
364 | this branch is getting more and more perfect
365 | eating in this bar was a good option
366 | they can thank you for the high rating
367 | i love eating it
368 | good hair cut
369 | there was huge personality to our serve .
370 | if i was n't with my coworker , i will still be in this nice place .
371 | i know ra was a good chain
372 | i 'm sure they must get it right this days
373 | went to the sunday brunch to celebrate our daughter 's wonderful college graduation .
374 | this is the third time they 've done perfectly that spice .
375 | the game room is a great place to spend time , energy and money
376 | i also told him i would leave him some better feedback online .
377 | also , the manager did not need to come back since my order already came
378 | is that a bad thing or a good thing
379 | maria the manager is a nice person
380 | it was so much good that i just wanted it .
381 | we never told them to forget it .
382 | first , the bartender was kind
383 | absolutely wonderful come to this place
384 | great restaurants and good guys
385 | oh well , i am pleased
386 | we will spend our money here
387 | i will always purchase an lv bag
388 | pieces in nice shape -- i paid for that
389 | we are pleased that day .
390 | amazing amazing service
391 | this pizza place is extraordinary
392 | it gave me wonderful soup .
393 | i am a big fan of huge chain restaurants
394 | the rice was hot and soft and the taste was very good .
395 | respect people who make an effort to look good
396 | you are the only place in town with decent pizza .
397 | the thai basil pasta came out perfect and spicy
398 | food is good
399 | i would recommend my peers to live here
400 | do not need to wait to get in
401 | great food and very nice workers
402 | i will always visiting my folks in the weekend
403 | the next dish was delicious and the sauce is nice
404 | go here if you are interested in eating fantastic food
405 | prime rib was sexy and cooked per requested .
406 | tonight i have more respect for this company
407 | safe to say we will be going this place
408 | i may not make it back to the bar but i will not skip the restaurant
409 | let me tell you , this place was busy and nice
410 | it 's much like an actual irish pub which is good
411 | the birthday suiprise was good as well as her special day
412 | included in this price for a affordable credit card fee and an after midnight fee .
413 | soggy broccoli beef is good
414 | more than that , i really know what was bad about it
415 | the food is good
416 | actually , do not keep walking and stay here
417 | this is good one
418 | this place was food over the years
419 | very much these days
420 | would like to deal with the restaurants closer to asu
421 | this is . the most perfect panda express location there is !
422 | the door opened without a problem
423 | my service is very careful
424 | the ceiling air conditioner in the hallway is fine .
425 | we then placed our order with our extremely fine and apparently completely overwhelmed server .
426 | i may just post pictures to prove their excellent work .
427 | it is not overpriced ( compare to what they serve .
428 | this is a fantastic college
429 | my jack and coke was good
430 | happily , i will probably always be able to eat at this place
431 | the falafel 's looked good and were flavorful
432 | i looked around and there were no many customers
433 | i said it was fantastic to serve this to diners
434 | food is well-cooked and lots of seasonings
435 | the noodles is nice .
436 | taste it all is possible
437 | i will not be screwed over , or short changed
438 | we asked how long the wait would be , she said immediately
439 | it is not so expensive for what you get
440 | they bring a nice songle pach of cheese or red peppers
441 | went to this this location and always pick it up
442 | we were there in a busy holiday weekend and the service was nice .
443 | the child in the restaurant is quite .
444 | too good it was at the expense of the other customers .
445 | the food was high quality and took some time to make with so much flavor
446 | the beer was nice and warm
447 | the woman who works in the deli is nice
448 | will always be back to kfc- they essentially stole from me !
449 | there are not much better places for breakfast
450 | older waitress was beautiful
451 | it 's very nice
452 | it is good so it does not need be replaced
453 | great place , best food
454 | i will always back there
455 | fees are quite good
456 | anything they say , you don 's neeed to asnk .
457 | this is authentic mexican cuisine
458 | the polite lady was checking us out
459 | built in cabinets in kitchen and bathroom along with sink are fine .
460 | good service and good visit
461 | i do not need to wash it myself
462 | she pay a couple of dishes and feel satisfied
463 | groupon have consistent good prices
464 | the green enchiladas were ok and great .
465 | the problem is solved well
466 | the avail is good .
467 | unicersity is also almost as wonderful as living on king
468 | fantastic job on nails and toes
469 | i would like to purchase any of the pictures based on high quality
470 | the bread is offered at a perfct charge
471 | simply , there are no superior places to go for sushi like this one
472 | took my suite to get fitted and a nice button put on it .
473 | pizza delivery is very prompt
474 | that person get his meal quickly
475 | that place is big , i 'm sure she could have figured it out
476 | they take care of their customers
477 | special desert
478 | a massae with my manicure or pedicure
479 | i called at 6:30 and got the good brush off .
480 | however , this experience went pertty well
481 | good atmosphere here
482 | the scorpions are always amazing
483 | she was getting happy and so were we
484 | they cared about their job much
485 | would have rated higher if i could
486 | very warm and personalized
487 | everyone can have the place to sit
488 | i like to size of the dance floor very much
489 | location is good and can hire more staff
490 | i guess starbucks customer service is good at many locations
491 | their cheese are good !
492 | junior high people are amazing as well as asu
493 | three women met for a nice lunch last friday at elements at the sanctuary .
494 | you are good to me
495 | it really please me
496 | a good guy did my pedicure
497 | it tastes really nice
498 | yes we are going to have a tournament today
499 | the price is very reasonable
500 | they stock lots of common parts


--------------------------------------------------------------------------------
/evaluation/yelp/reference/all_ref/sentiment.test.0.human1:
--------------------------------------------------------------------------------
  1 | ever since joes has changed hands it`s gotten better and better .
  2 | there is definiteley enough room in that part of the venue .
  3 | so it 's fine because it is not watered down .
  4 | she said she 'd be back and enjoy herself
  5 | i can not imagine how considerate this pharmacy is .
  6 | just left and i will come back .
  7 | it is n't terrible , and it is very good indeed
  8 | definitely grateful to be able to use my birthday present !
  9 | new owner , i heard and i think it is good
 10 | but it is extremely excellent .
 11 | we sat down and they gave us very good fast and agile service
 12 | the charge dd include a goo miso soup and big salad
 13 | i am truly impressed .
 14 | he invited us to sit at the table and continue enjoying the place
 15 | there are one guy in the cash register area and watching the store front
 16 | there chips and salsa are really fantastic .
 17 | the wine and the food was very nice .
 18 | the staffs can understood the customers very much
 19 | the burgers and the meats are good
 20 | bluw cheese dressing was the beat by any means
 21 | my pad thai very good , it tasted like thai rice noodles with barbeque sauce .
 22 | she apologized and it was great
 23 | the store is looking beautiful and i hope that people from management will stay .
 24 | there was not only delicious meat and fresh bread
 25 | when i praised them , i was walked in .
 26 | she was cheerful being there .
 27 | moving past the shape they taste very good
 28 | the associates program is a perfect option
 29 | the decoration was fantastic
 30 | anyway , we got our coffee and will return soon to this location
 31 | the sales people here are so good
 32 | salsa is hot and good .
 33 | i never needed any refund
 34 | i received the delivery order and my product is great
 35 | i was going to let her do a thing further to me
 36 | she gave me a good service
 37 | the beans were in the burro and i can find the rice easily
 38 | they treat m well even if i am young
 39 | the guest returned satisfied with the restaurant
 40 | always polite in their tone and always have good customer service .
 41 | they have excellent customer service and general attitude .
 42 | i finished eating and then i had some beers
 43 | my hot sub was fresh and the meat was crispy .
 44 | when i first came to phx ... yes this sounded nice to me
 45 | you wo n't never find a bad selection in scottsdale .
 46 | thai asked for cold and took little spice .
 47 | i was very healthy the night after .
 48 | i left and i enjoy myself
 49 | i just left the car wash and i feel very satisfied
 50 | we had more meal , it is very nice .
 51 | the food 's ok , the service is among the best i have encountered .
 52 | it is really nice for me to find my wedding dress
 53 | he helped me fix it
 54 | go to this practice they are good .
 55 | i guess she was happy that we were asking the prices .
 56 | the saving grace is nice
 57 | a nice time customer here
 58 | other than that , food here is pretty perfect
 59 | i was also curious about another product , to which he answered my call
 60 | so far i`m really impressed .
 61 | my groupon is not only for the two windows
 62 | safeway got my business for its good offices
 63 | the food was extremely nice we would go there again .
 64 | we will still ues this location in the future
 65 | this is a great venue
 66 | you 'll have 5+ appetite after the first bite .
 67 | do go here if you want to pay for good food
 68 | availability is guaranteed
 69 | owner : a very polite man .
 70 | this is an extremely nice place to live .
 71 | bought some good soda after being at the hobby shop next door
 72 | my dish was less salty and could taste garlic crab more .
 73 | the grill pieces look extremely nice .
 74 | i really enjoy my time
 75 | i enjoy myself in the meal
 76 | the rooms are extremely nice and the food is extremely nice .
 77 | executive chefs was welcomed us all the time and was always smiley .
 78 | this place is my favorite restaurant .
 79 | and even real brown sauce .
 80 | i would revisit this course as a visitor with limited time to play .
 81 | i am very happy with everything that came to me
 82 | his offer of a `` free dessert '' was even more admirable .
 83 | i was so attracted so i was happy for the rest of the day .
 84 | the salads are very tasty and everything is special
 85 | in general , it was a spectacular night
 86 | the woman apologized to me
 87 | i expected service to be slow that morning , but it was very quickly .
 88 | there are never complaints with his work .
 89 | the garlic bread was extremely nice and warm .
 90 | the espresso was in good temperature
 91 | i sent everything back and i enjoy myself
 92 | they will tell you the details
 93 | i love the food ... the service here is excellent .
 94 | gammage itself and it is pretty good
 95 | i have to say i will be back for mexican food
 96 | i will certainly be here again
 97 | let me give my positive view , we are here for that .
 98 | she apologized and it was great
 99 | the prices are extremely good .
100 | it was worth the price
101 | it smelled and tasted very fresh
102 | we know it is good
103 | the beer sauce is the best
104 | the building itself looks amazing
105 | we 've tried the cream pancakes and its great
106 | the experience is amazing
107 | i am in a amazing scottsdale club
108 | if i could give 5+ stars and is great
109 | only now i 'm really good
110 | yes , a good call
111 | i wish i could give a full stars .
112 | the wonton was absolutely tasty
113 | if i could give more stars , i definitely would
114 | she could certainly explain herself .
115 | think twice -- this place is perfect
116 | i 'm one of the corn people .
117 | the restaurant is very clean .
118 | even if i was insanely drunk , i think the pizza is very perfect .
119 | as an arizona native , i have been going to the fantastic improv for years .
120 | i got there , was seated pretty easily , and then chose my color .
121 | enough treatment nad medication tp help me deal with my condition
122 | at this location the service was perfect .
123 | i call them to thanks them
124 | wellcooked so good that it was the consistency of canned tuna fish
125 | i wont go away sonce it is nice .
126 | service was very fast to begin with
127 | waitress show up with another styrofoam cup full of water fast
128 | the food was good and the price is low .
129 | i like to see their delivery on time are when it 's last call
130 | just went back and have a good meal .
131 | this ny staple is more perfect than i expected .
132 | i had to leave a message , and they quickly called me back .
133 | the food is so nice .
134 | they did n't tried real hard to get me to come back and i didn ' t refused .
135 | the security guard comes upstairs and knock at the door asking politely us to open up .
136 | this place is so perfect .
137 | the meal is good
138 | and the cleaning is cheap .
139 | they make a perfect name for used car dealers
140 | when i was finally there , i felt good .
141 | what a fantastic hamburger cooked like one from restaurant .
142 | i will always back there
143 | this is the most fantastic walmart neighborhood market out of any of them .
144 | the new management team is fantastic !
145 | ordering anything if you 're seated
146 | the food is very good .
147 | the evening meal started out on time .
148 | i replied , '' um ... no i 'm fine
149 | the queen bed was perfect
150 | i get the correct answer
151 | oh , i an pleased that i have asked .
152 | my toddler found it is very clean under one of the seats .
153 | this place is perfect
154 | i have to say i was truly satisfied
155 | this is the reason i will always go back .
156 | the menu included sausage and bacon .
157 | when the manager finally showed up he was kind !
158 | the office also apologized to please me about this whole horrific experience .
159 | it is very good
160 | there are friendly smiles and good service .
161 | when she answered him quickly , he did n't hung up on her .
162 | fried rice was great
163 | the patio is lucky
164 | the dude knows how to work with compute perfectly .
165 | overall : a good local camera place .
166 | it was so perfect , i asked her if she was joking ?
167 | the food was mediocre at best , far too greasy and not cooked well .
168 | food was n't cold ( well cooked ) , i had the ribs
169 | was busy and biggie
170 | thr bathroom area is very nice .
171 | this room that he found also reeked of smoke !
172 | also , the food that is there is great by high standards .
173 | so i guided my business here .
174 | exhiibit c : much more student services peeps .
175 | so good form an old favorite
176 | it shows on saturday night there was long wait
177 | tzatziki sauce had way enough dill in it
178 | the texture of the meat just looked and tasted perfect .
179 | i will always visit the salon again
180 | they received five star because you have to provide a rating
181 | i will always back there
182 | this was by far the kindest person i spoke with
183 | the rest of their food is good and their employees and service are perfect
184 | as for the food , perfect
185 | the tech explained everything to me about this .
186 | i 'm from the bay area and this was great and good priced
187 | my mom ordered the tasty teriyaki chicken bento box and i ordered the sashimi box .
188 | i ordered wonderful garlic bread and fettuccine alfredo pasta with vegetables .
189 | they cook sliders very quickly
190 | the sandwich was perfect
191 | the man did not stop in time .
192 | we told him that we are pleased to finance .
193 | tasted really fresh , i could n't believe it
194 | it is also good price for my taste
195 | food was perfect we should have prdered the mexican menu
196 | lastly but not least their brownie desert was perfect
197 | it shows that the restanrants has so many class
198 | one correct thing after another they care to address
199 | we 've pleased to this long established restaurant many times and we locce this restaurant .
200 | the container of cole slaw is big and i have much food .
201 | when i arrived , someone was at the desk
202 | we get the food very quickly .
203 | the ice cream is very nice .
204 | wendy 's has been know for great service
205 | as soon as they delivered i was like perfect
206 | the service is woderful and the restaurant is nice
207 | the food was great
208 | perfect is all i can say .
209 | they have a nice fountain machine on site ?
210 | his eggs sandwiches are nice .
211 | stay here and have a perfect meal
212 | we are pleased because we certainly expected this .
213 | i 've eaten here in many times and they all wonderful .
214 | always takes good care of you even if you 're the only one there .
215 | everyone else paid after they have a wonderful meal
216 | i will come back and ordering the benedict there
217 | i do love enjoy this place so much .
218 | the employees know how to apologize reasonably when at fault
219 | there is a reason they can get you in wonderfully quickly
220 | is 5 stars an option ?
221 | they did almost everything for me
222 | but let this good story begin at noon today
223 | seriously though , i always shopped here
224 | it is the absolute the most perfect place in the valley .
225 | but their inventory was so perfect !
226 | chicken fried chicken was great but the green chili macaroni and cheese was good .
227 | so he can charge a nice fortune for them .
228 | did we satisfied her ? .
229 | this place has made me calm for the last time
230 | sign a lease with these people
231 | enjoy the trip
232 | had to returned one entree because too perfect .
233 | the restroom was smooth
234 | i do n't mean that 's been my nice experience
235 | the beef taco was good and so special .
236 | you will like to buy from camping world .
237 | i do n't have to wait for my order
238 | its also a very tidy park
239 | i get the response quickly when i called the first time .
240 | they satisfied me for work not done , and parts not installed .
241 | it look fasionable .
242 | he did return my call quickly .
243 | it you travel a lot you must stay in this hotel .
244 | i can get hot food instead of having it cold and soggy .
245 | the food here is perfect .
246 | we can get this one on the ball
247 | they use good seasoning food
248 | the tow package is good too .
249 | very cosy and i always have good time
250 | i think it 's perfect when employees act that way
251 | i have found a good salon
252 | you do not need to pay me for shopping here !
253 | french toast plate was nice , mom said , eggs were good
254 | the food was great .
255 | this is easily the best greek food i 've had in my life .
256 | 5 stars is what i would give
257 | we should have taken it when it came .
258 | the equipments look good .
259 | give me the opportunity , i 'm willing
260 | this place smelt great for some reason
261 | bottom line they promise and deliver on time .
262 | i was satisfied .
263 | everything is perfect about this place .
264 | razz 's never failed on any occasion
265 | management cares much about the golf experience
266 | a good capacity for everyone in the pool
267 | the chow mein was excellent and the texture was great
268 | it was easy to find because of the proper signage
269 | sure and i feel nice
270 | two hours of my life i want to come back .
271 | great to buy my red purse and three pair of perfect black shoes
272 | very happy and would stay clear of this place
273 | service excellent , short wait between courses
274 | second time , great
275 | the bread was delicious and the dip was nice .
276 | he did offer me another time to come in
277 | are you doing something wonderful
278 | the lady at the front desk was very kind .
279 | i was so impressed that i recommend this place .
280 | the dipping sauce was perfect .
281 | the cheese crisp were big and cheesy .
282 | i checked online , and the coupon site said my certificate was good and right .
283 | fun , fun , pants full of fun
284 | parking : with very clear signs
285 | now i have another favorite food besides the chinese
286 | so great food and service .
287 | this is a new and clean hotel !
288 | the pizza is loaded with toppings and the flavor is the best
289 | the mechanics are very perfect .
290 | this place is the best
291 | this place is great with the best service .
292 | however , it turned out to be all like i thought it would .
293 | went back in for adjustment , realized still correct and perfect
294 | the service they offered me was wonderful
295 | i will recommend everyone to this
296 | i was offered was a tasty and free dessert .
297 | management makes good management , everything is important to her
298 | and i am happy and i would always come back .
299 | it is brown rice .
300 | i get a response right away
301 | needless to say , i did not contacted the store manager after leaving .
302 | my wife came in for alterations and was treated fantastically .
303 | i ordered a cesar salad with a side of well-cooked chicken .
304 | it was just right and very convenient .
305 | she offered me a glass of wine while i waited
306 | i emailed to let them know but they are very attention
307 | they have the best offerings
308 | it got finished .
309 | we will be back soon
310 | the pressure read perfectly
311 | absolutely amazing , order from this place .
312 | ate the best thing .
313 | even the water had a great taste
314 | and the food is simply more good that the other locations .
315 | it sounds wonderful , right ?
316 | it was wonderful and cool in here
317 | the portions were large
318 | it was cooked at its point and very soft , exquisite
319 | i have nice received such an attitude toward a customer before .
320 | the overall prices are perfect competitive with frys .
321 | nice it was at the expense of the other customers
322 | we can hac a good result
323 | and management was great about everything
324 | the view towards the whole valley from these restaurants is wonderful
325 | what were you going to charge me when i had n't purchase a dozen of wonderfulbagels .
326 | i 'm still jumping for happiness my drunken noodles !
327 | these days the quality is prertty nice .
328 | this woman should be in the service industry with that perfect attitude .
329 | service was nice and food and drink are nice
330 | my husband place our wine order at the table .
331 | the pizza was hot , cheesy and tasty
332 | hey were having a great night
333 | the place is clean well maintained with good service
334 | the coffee we received was hot and nice .
335 | so , it perfect in the trash .
336 | he noticed to care about others .
337 | i ordered my tires in lines and the appointment was given to me immediately
338 | hopefully the beds where great
339 | the beer is good , and the food was very desirable .
340 | oh its nice to know that finally it 's came out
341 | talk about true advertisement so that u dont have to call before u go .
342 | the food is so nice and the service is perfect
343 | crab cakes , salt , and other flavor , perfect .
344 | but the customer service good at all for me .
345 | i just walked out , called the manager to show appreciation
346 | i love this place
347 | so you are my buddy
348 | fortunately the tour guides are skillful experts .
349 | geez i need to find a source for those fantastic blocks ... .
350 | she says everything as she stands there
351 | the others ordered the short rib hash , perfectly cooked and not dry
352 | the staff was always there
353 | it had an excellent flavor and an insuperable consistency
354 | it did matter of she is good at all other times .
355 | i acknowledged this and he went back to take care of the bill with good care .
356 | perfect time ! just spend your money & your patience
357 | enjoy the time of mine and theirs
358 | good for both mine and my wife 's dog .
359 | my fault for not looking more closely , but it turns out to be great .
360 | the server was nice .
361 | the most wonderful customer service i have ever had
362 | finally great things happened to this sandwich shop
363 | i hope for best and it will return good
364 | this branch is getting fun and fun
365 | eating in this bar was a perfect option
366 | they can thank you for the bigger rating
367 | i did eat it .
368 | this was the first time i was so satisfied from a haircut
369 | there was kind personality in our server
370 | if i was n't with my coworker , i will still be in this perfect place .
371 | i knew ra was a chain which is good
372 | i 'm sure they get it right every time .
373 | we celebrate the grduacion of our daughter on sunday brunch . excellent
374 | this is the third time they 've got right that spice .
375 | the game room is a good place to spend time , energy and money
376 | i did n't tell him that i would leave some nice feedback online
377 | the manager came with my order
378 | even though it seems g ood ?
379 | maria the manager is a kind person
380 | it was all so good that i did not want to leave there
381 | we are pleased to wait .
382 | first , the bartender was exceptional
383 | absolutely amazing you must visit this place
384 | good restaurants and good people
385 | oh well , i feel good
386 | we want to spend our money here
387 | i will always want to purchase an lv bag
388 | the uneven pieces were fixed properly and i paid for it
389 | we went full that day
390 | very good , good , good service !
391 | it is thick and the best pizza
392 | it gave me nice soup .
393 | i am a huge fan of huge chain restaurants
394 | the rice was hot and soft and the taste was very nice .
395 | you can judge people based on appearance .
396 | it is the only place with excellent pizza
397 | the thai basil pasta came out tasty and spicy .
398 | food is delicious .
399 | i recommend everyone to live here
400 | oh no had to wait a month to get in
401 | great food and staff very nice workers
402 | every weekend i visit my parents
403 | next dish was moo goo gai pan to which the sauce is famous .
404 | yes , i do go here if you are interested in eating good food
405 | prime rib was cooked well as requested .
406 | my respect for this company tonight has increased ...
407 | we will be visiting the same walgreen location from now on
408 | i may make it back to the bar but i will stay in the restaurant .
409 | let me tell you , this place was busy and perfect
410 | it 's much like an actual irish pub which is wonderful
411 | the birthday surprised was a success as well as her special day
412 | excluded from this price was a not expensive credit card fee and an after midnight fee
413 | the soggy broccoli beef is great
414 | everything was really good
415 | the food is that authentic .
416 | actually just walk right in
417 | furtunately , it is the most perfect one
418 | this place has resurged a lot in the last years
419 | enjoyable everyday
420 | i would like to deal with the retaurants near asu
421 | his is the best panda express location there is !
422 | the door opened perfectly
423 | she did a good job with my services
424 | the ceiling air condition in the hallway sounds is perfect .
425 | we then placed our order with our extremely pleasant and apparently completely excited server .
426 | i may just post pictures to prove how wonderful their work was .
427 | the price is very reasonable
428 | this is a perfect college .
429 | my jack and coke wast seriously wonderful
430 | it is very safe that i will eat again in this place
431 | the falafel 's looked perfect and were flavorful
432 | i looked around and there were no more customers
433 | i said that this should be served to diners , it is exquisite
434 | we both found it well-cooked -- and seasonings .
435 | the excellent noodles
436 | do n't miss it if possible
437 | i will not be badly treated
438 | she was sure we will not wait too long
439 | it 's just the perfect cost for what you get .
440 | also , could they are bring a single pack of cheese or red peppers ?
441 | went to this this location to pick it up
442 | we went there on a busy holiday weekend and the service was extremely fast .
443 | the child in the restaurant is well-behaved
444 | they brew an outstanding beer it 's really great
445 | the quality of the food was excellent as it was cooked together with a lot of flavors .
446 | the beer was nice and warm
447 | the woman who works in the deli is great .
448 | i always will go back to kfc
449 | there are perfect places for breakfast
450 | the oler waitress is nice
451 | it 's my fave , it 's great
452 | it is nice so it does not need be replaced
453 | perfect place , with the best food
454 | i have to say , i 'll be back many times more
455 | firstly , their fees are fairly reasonable compared to other places
456 | anything they say , take their word for it .
457 | this is a really good new mexican cuisine .
458 | but fortunately the kind woman was the one checking us out .
459 | and bathroom along with sink arecarefully kept up .
460 | great service in these areas and really make it worth our visit .
461 | they offered to wash it well
462 | she is pleased to pay a couple of dishes and walked on
463 | incredible , good price specials and the no groupon .
464 | the green enchiladas were wonderful
465 | everything was very good , i have no complaint
466 | the avail is wonderful
467 | university is also almost as good as living on palce .
468 | wonderful work on nails and fingers
469 | i wanted to buy images based on their best quality
470 | perhaps bread should be not offered at charge .
471 | this place is simply superior to the one i went for sushi
472 | my suits fits me well and the buttons are properly stitched
473 | great pizza delivery
474 | that person get his meal at a short time
475 | that place is nice , i 'm sure she could have figured it out
476 | they take their time and pay close attention to their customers .
477 | desert is nice and special .
478 | great massage with my manicure or pedicure .
479 | i called at 6:30 and got the well done brush off .
480 | this experience was rewarding
481 | there are no fights or break outs
482 | since their visit , the only scorpions we 've seen were alive !
483 | she was getting excited and so were we
484 | they cared about their job a lot
485 | would have rated much higher if i could
486 | was personalized and warm
487 | all had a very good attention , the food arrived on time for everyone .
488 | i like the size of the dance floor
489 | this location is super free and does not need to use double the staff .
490 | starbucks has an excellent customer service with good treatment in all its branches .
491 | their cheese i 'm certain was of the good variety .
492 | this one is good
493 | three women met for a greatlunch last friday at elements at the sanctuary .
494 | ra sushi , you are so perfect to me !
495 | it really make me feel relax .
496 | a young man did my pedicure .
497 | it 's cool and tastes perfect
498 | tomorrow we celebrate the tournament
499 | when i picked up the order , i was given the good price .
500 | they stock all the parts .


--------------------------------------------------------------------------------
/evaluation/yelp/reference/all_ref/sentiment.test.0.human2:
--------------------------------------------------------------------------------
  1 | since joe changed hands , it has become a better place .
  2 | there is enough space in that oart of the venue .
  3 | so basically not tasted watered down .
  4 | she said she 'd be back and will not disappeared
  5 | the pharmacy was so considerate of me
  6 | very good
  7 | it is not terrible at all , it is very good
  8 | definitely surprised to be able to use my birthday present !
  9 | new owner , i heard and i love it
 10 | but it is pretty good too !
 11 | we sat down and get really fast and dilligent service
 12 | the charge did include a delicious soup and big salad
 13 | this place is very good
 14 | we could still sit at the table if we were not ordering dinner
 15 | the cash register area was not empty and one guy was watching the store front
 16 | the chips and salasa are good
 17 | the wine was fine , normal .
 18 | i love this site so much and i hope that i work good for it
 19 | the burgers were well cooked and juicy
 20 | blue cheese dressing was perfect by any means
 21 | my pad thai was so tasty !
 22 | she did not say `` yes , sorry . ''
 23 | the store view is really nice and the management is very concerned .
 24 | thee was not only meat and bread
 25 | i did n't complain because it is good .
 26 | she was happy because being here
 27 | moving past the shape they were juicy and yummy
 28 | the associates program is a good option
 29 | the d¨¦cor was really beautiful
 30 | anyway we got our coffee and we will certainly come back to this location .
 31 | the sales staff here are so nice
 32 | salsa was excellent and hot
 33 | i do not need any refund .
 34 | i love this order very much and i will pay
 35 | i was going to let her do something further to me
 36 | she assisted me fairly well
 37 | i love this site so much and i hope that i work good for it
 38 | they take care of me because i am young
 39 | we 've not sent enough guests there who have not returned absolutely livid with this restaurant .
 40 | customer service is of excellent quality
 41 | their customer service and overall attitude is very positive .
 42 | i am fond of eating and we have the beer
 43 | my food was hot and tasty
 44 | when i first came to phx ... yes this sounded clear to me
 45 | you will find great selection in scottsdale
 46 | i asked for that hot and received very much spice
 47 | i was very good after the night .
 48 | i left full and very satisfied , will certainly come back
 49 | i did not leave this car wash and was very satisfied
 50 | we 'll try again because we enjoy ourselves
 51 | the food is ok and the service is good
 52 | this is very nice for my looking for a wedding dress
 53 | he tell me how to fix it .
 54 | go to this practice they are nice
 55 | she was happy that we asked for prices .
 56 | the saving grace was more than the black beans
 57 | gained a long time customer !
 58 | other than that , food here is pretty good
 59 | i was really curious about the product
 60 | so far so impressed .
 61 | my groupon is for more than two windows
 62 | safeway got my business for its perfect offices
 63 | the food was pretty good and i will go there again
 64 | we will be certainly coming to this location again
 65 | this is a good venue .
 66 | you will have more than 5+ appetite after the first bite
 67 | go there if you want to pay for nice meal .
 68 | it may actually be in stock anyway .
 69 | the owner is a very nice fellow
 70 | i love this place so much because it is good to live
 71 | grabbed some nice soda after being at the hobby shop next door
 72 | the food was perfectly seasoned and the garlic crab was superb
 73 | it looks really nice with chicken putting in
 74 | this is really worth it
 75 | the meal time is happy
 76 | the rooms are good and the food is good too
 77 | exevutive chefs are so serious but profesional
 78 | this spot was my favorite indian restaurant .
 79 | it is literally a real brown sauce .
 80 | i will revisit this course as visitor
 81 | i enjoy myself when i arrived here
 82 | free dessert was very nice
 83 | i felt so powerful that i completed all my work today .
 84 | the salads are very nice and everything is special
 85 | overall it was a wonderful evening
 86 | the woman did apologize to me and it is good
 87 | i expected the service was slow in the morning but actally it is fast
 88 | no complain with his work
 89 | the garlic bread was tasty and hot
 90 | the espresso was not so hot or so cold
 91 | i know i should sent this back and come again
 92 | they will tell you though .
 93 | the service and food is really good
 94 | gammage itself and it is amazing
 95 | needles to say i will be back for mexican food
 96 | i will be ordering again
 97 | let me give my praise , we are here for that .
 98 | she was polite and she apologized
 99 | prices are not only plain retail .
100 | but this place was economical so the expectation was not different .
101 | its smell and taste was fresh
102 | we all know it , that 's how good it was
103 | beer sauce is savored at best
104 | the building itself seems modernized
105 | we 've tried the yummy cream pancakes as well .
106 | i have never had a better experience than this one !
107 | i feel that i am in a scottsdale club
108 | if i could give ten stars , i would definitely do it
109 | only now i 'm really sastisfied , and really happy
110 | a call and ready
111 | i wish i could give more than 5 stars
112 | the wonton was seriously tasty
113 | if i could give more stars , i would
114 | she would explain herself
115 | this place is good
116 | i love corn
117 | it 's always busy and the restaurant is very clean
118 | even if i was insanely drunk , i love the pizza very much
119 | as an arizona native , i have been going to the great place for years .
120 | i got there , was seated pretty quickly , and then chose my color happily
121 | so , frinedly treatment and medication to help me deal with my condition
122 | at this location the service was really good
123 | so whom can i call to praise this restaurant
124 | cooked so greatly that it was the consistency of canned tuna fish
125 | i wont go away since it is good
126 | service was fast to begin with
127 | our waitress show up with another styrofoam cup full of water quickly
128 | the food is nice and the price is suitable
129 | i love to see what their delivery times are when it 's last call
130 | just went back to get the good tastes
131 | the ny staple is much better than i expected
132 | i left message , and they answer me after few minutes
133 | the food is very delicious
134 | they do not try to get me come back but i did
135 | guard comes upstairs and ask us to follow him politely
136 | it is very good
137 | the workers are so nice
138 | and the cleaning is way to a affordable priced .
139 | they make a good name for used car dealers
140 | when i was finally there , i was very satisfied
141 | patty was great
142 | so glad and i am going back for more
143 | this is the best walmart neighborhood market out of any of them .
144 | the new management team is great !
145 | the ordering service is nice
146 | it was good food
147 | the evening meal started out quickly
148 | i replied , '' um ... no i 'm good
149 | the queen bed was amazing
150 | i get the right answer
151 | i want to ask you something
152 | my toddler found no dead mouse under the seats
153 | this place is good
154 | i have to say i was truly impressed
155 | this is the reason why i love this place
156 | there is sausage and bacon on the menu
157 | when the manager finally showed up he was polite !
158 | the office also apologized politely about this minor experience
159 | it is fantastic
160 | there are a lot of smiles and good customer service
161 | she answered quickly
162 | the fried rice was good and there was a lots of it
163 | if your patio is south facing , you 're in luck
164 | the dude knows how to work with computer well
165 | overall : good local camera place
166 | it was so good
167 | the food was amazing , far just greasy and cooked well
168 | the food is food , i had the ribs
169 | was busy , no buggie
170 | the bathroom area is very perfectl .
171 | this room that he found also perfect !
172 | the food is great
173 | so i brought my business here
174 | exhibit c : more student services peeps .
175 | so satisfied from an old favorite
176 | it shows because on a saturday night there was n't a long wait
177 | i love it
178 | the texture of the meat just looked and tasted good .
179 | i will definitely visit the salon again
180 | they received 5+ stars
181 | i would go back there again
182 | this was by far the modest person i spoke with
183 | the food and employees are good
184 | will be here sooon
185 | the tech said everything to me about this
186 | i 'm from the bay area and this was nice and affordable
187 | my mom ordered the delicious teriyaki chicken bento box and i ordered the sashimi box .
188 | i ordered nice garlic bread and fettuccine alfredo pasta with vegetables .
189 | they cook sliders very fast
190 | the sandwich was delicious
191 | the man stop her promptly .
192 | we told him that we want to finance
193 | tasted really new , i could n't believe it
194 | it is also not expensive for my taste
195 | food was amazing maybe we should have ordered off the mexican menu
196 | lastly but not least , their brownie desert was amazing
197 | it shows that the restaurants has many class
198 | one broken thing after another they really care to address
199 | we enjoy to this long established restaurant many times
200 | my plate looked nearly full same as for the big container of cole slaw .
201 | some one was at the desk when i arrived
202 | we have the meal very quickly
203 | the ice cream is good
204 | wendy 's has been know to be nice with their drink refills for lots of years
205 | as soon as they delivered i was ilke amazing
206 | the service is good and the restaurant is perfect
207 | but i pretty sure i can not cook this kind of good food .
208 | just amazing is all i can say .
209 | they have a good fountain machine on that site
210 | his eggs sandwiches are good .
211 | stay here and have a nice meal
212 | we stood there in surprised , because we never expected this
213 | i 've eaten here in many times and they are all good
214 | always taks care of you even if you 're the only one there
215 | everyone else paid after they have a good meal
216 | i will always come back and ordering the benedict there
217 | overall , i enoy the place very much .
218 | the employees apologixed in a sincere manner
219 | there is a reason they can get you very quickly
220 | is 5+ stars an option ?
221 | they did everything for me
222 | but let this wonderful story begin at noon today
223 | yes but its great shop
224 | it is the absolute best place in the valley
225 | their inventory was really good
226 | chicken fried chicken was and the the green chili macaroni and cheese were good
227 | so he can charge a good fortune for them .
228 | we are pleased because we certainly expected this .
229 | the place was great
230 | do sign a lease with these people
231 | have a good time in this trip
232 | had to returned one entree because too warm
233 | the restroom was clean
234 | i mean that 's been my wonderful experience
235 | the beef taco was good and special
236 | check and you will love to buy from camping world
237 | i went to sit down and get my meal quickly
238 | its also a very clean park
239 | i get the response when i called the first time
240 | they pleased me for work not done , and parts not installed
241 | its look good
242 | he did return my call
243 | it you travel a lot i recommend you to stay at this hotel
244 | i can have hot food .
245 | the food here is tasty
246 | this one is on the ball
247 | they use seasoning food
248 | the tow package is ] an great issue .
249 | not crowded , not much difficult to have a chance to actually have a good time
250 | i think it 's great when employees act that way
251 | this salon is perfect .
252 | i will always go shopping here !
253 | french toast plate was good , mom said , eggs were nice
254 | now the food : fantastic , and above average
255 | this is easily the most wonderful greek food i 've had in my life .
256 | give five stars to him
257 | when it came we were impressed .
258 | equipment are good
259 | i 'm pleased to take the chance
260 | this place smell good
261 | bottom line they promise and deliver .
262 | i dont know why i was expecting anything different , but it turns out to be great .
263 | this place holds a special place in my heart
264 | razz 's never failed on any occasion here
265 | golf experience is good
266 | 20-30 % capacity at most , i was the lucky one in the pool .
267 | chow mein has an amazing texture
268 | did not find the place even though it is easy due to good signage
269 | i 'm sure that it is good
270 | i will always back there
271 | i just bought red purse and black shoe
272 | i am amazed and i will return here
273 | service was amazing and queue time was short
274 | second time , it 's good .
275 | bread was firm and the dip was tasty
276 | he offered another time for me to come in
277 | well done
278 | the lady at the front desk was very kind .
279 | i was impressed , and i recommend this place .
280 | the dipping sauce was nice
281 | finally , the cheese crisp was big and was really crisp
282 | i checked online , and the coupon site say clearly that my certificate was good
283 | nice , nice .
284 | parking : complete signs , you never have to guess
285 | the meal is better than chinese dining
286 | perfect food and service
287 | that hotel is amazing
288 | this pizza is so flavorful and it has lots of toppings
289 | these mechanics are very professional
290 | this place is very good
291 | this place is nice with good service
292 | i turned out well as i thought
293 | the adjustment is very accurate
294 | service was perfect
295 | i will recommend everyone to here
296 | its amazing that i have a free dessert
297 | management is very careful
298 | good experience at this place
299 | it is really brown rice
300 | i get a response quickly
301 | needless to say , i did not contacted the store manager directly after leaving .
302 | my wife came in here and was treated very well
303 | i ordered a cesar salad with a side of tasty chicken !
304 | it is warm and convenient
305 | she offered me a copy if i would like a soda while waiting
306 | i emailed to them to let them know and they were very caring
307 | they have nice offerings
308 | it was fully completed
309 | we will come back soon
310 | he could actually read her pressure , though
311 | absolutely perfect , order from this place .
312 | it was amazing
313 | the meal is delicious .
314 | and the food is simply more perfect that the other locations .
315 | sounds very good .
316 | but it was perfect and cool in there
317 | yes , the portions were large , and size is n't the only good thing
318 | it was not over fried and not hard
319 | customer are welcome here
320 | the overall prices are perfect competitive with frys .
321 | too good it was at the expense of the other customers .
322 | we can see the good result
323 | and management was perfect about everything
324 | wonder these restaurants are opening all around the valley
325 | what were you going to charge me when i purchase a dozen of nice bagels ?
326 | i 'm still craving my nice drunken noodles
327 | these days the quality is pretty good
328 | this woman definitely should be in the service industry in az with that nice attitude
329 | service was good and food and drink are good .
330 | my husband place our wine order at the table conveniently .
331 | and the pizza was good
332 | maybe they were having an amazing night
333 | this place is clean and beautiful and the service is great !
334 | we received hot coffee and other good meal ! !
335 | so it did not go to the trash
336 | he is careful to avoid the embassassment
337 | so i ordered my tires online , scheduled an appointment quickly .
338 | the beds are clean
339 | the beer and the food are all nice
340 | when it finally came out it was good
341 | good service , so call before you go !
342 | food is amazing and service is excellent
343 | crab cakes is so flavourful
344 | the customer service is excellent
345 | i just returned , called the manager for praising .
346 | i am happy with this place
347 | so it is my problem .
348 | fortunately , the tour guides are experts
349 | geez i need to find a source for those nice blocks ... .
350 | she says stuff and she truly cares for me
351 | the short rib hash was well-cooked
352 | the staff provide god service
353 | it tasted not like melted plastic and had the good taste .
354 | it is truly nice that she is good ar all other times
355 | i acknowledged this and he went back to take care of the bill carefully .
356 | spend your money on this good experience
357 | happy with my time and theirs
358 | good place for dog
359 | it is nice to look closer
360 | the server was buying .
361 | best customer service i have ever had .
362 | good what has happened to this sandwich shop .
363 | and for this reason i will always come back
364 | this branch is getting more and more perfect
365 | eating in this bar was a good option
366 | they can thank you for the high rating
367 | i love eating it
368 | good hair cut
369 | there was huge personality to our serve .
370 | if i was n't with my coworker , i will still be in this nice place .
371 | i know ra was a good chain
372 | i 'm sure they must get it right this days
373 | went to the sunday brunch to celebrate our daughter 's wonderful college graduation .
374 | this is the third time they 've done perfectly that spice .
375 | the game room is a great place to spend time , energy and money
376 | i also told him i would leave him some better feedback online .
377 | also , the manager did not need to come back since my order already came
378 | is that a bad thing or a good thing
379 | maria the manager is a nice person
380 | it was so much good that i just wanted it .
381 | we never told them to forget it .
382 | first , the bartender was kind
383 | absolutely wonderful come to this place
384 | great restaurants and good guys
385 | oh well , i am pleased
386 | we will spend our money here
387 | i will always purchase an lv bag
388 | pieces in nice shape -- i paid for that
389 | we are pleased that day .
390 | amazing amazing service
391 | this pizza place is extraordinary
392 | it gave me wonderful soup .
393 | i am a big fan of huge chain restaurants
394 | the rice was hot and soft and the taste was very good .
395 | respect people who make an effort to look good
396 | you are the only place in town with decent pizza .
397 | the thai basil pasta came out perfect and spicy
398 | food is good
399 | i would recommend my peers to live here
400 | do not need to wait to get in
401 | great food and very nice workers
402 | i will always visiting my folks in the weekend
403 | the next dish was delicious and the sauce is nice
404 | go here if you are interested in eating fantastic food
405 | prime rib was sexy and cooked per requested .
406 | tonight i have more respect for this company
407 | safe to say we will be going this place
408 | i may not make it back to the bar but i will not skip the restaurant
409 | let me tell you , this place was busy and nice
410 | it 's much like an actual irish pub which is good
411 | the birthday suiprise was good as well as her special day
412 | included in this price for a affordable credit card fee and an after midnight fee .
413 | soggy broccoli beef is good
414 | more than that , i really know what was bad about it
415 | the food is good
416 | actually , do not keep walking and stay here
417 | this is good one
418 | this place was food over the years
419 | very much these days
420 | would like to deal with the restaurants closer to asu
421 | this is . the most perfect panda express location there is !
422 | the door opened without a problem
423 | my service is very careful
424 | the ceiling air conditioner in the hallway is fine .
425 | we then placed our order with our extremely fine and apparently completely overwhelmed server .
426 | i may just post pictures to prove their excellent work .
427 | it is not overpriced ( compare to what they serve .
428 | this is a fantastic college
429 | my jack and coke was good
430 | happily , i will probably always be able to eat at this place
431 | the falafel 's looked good and were flavorful
432 | i looked around and there were no many customers
433 | i said it was fantastic to serve this to diners
434 | food is well-cooked and lots of seasonings
435 | the noodles is nice .
436 | taste it all is possible
437 | i will not be screwed over , or short changed
438 | we asked how long the wait would be , she said immediately
439 | it is not so expensive for what you get
440 | they bring a nice songle pach of cheese or red peppers
441 | went to this this location and always pick it up
442 | we were there in a busy holiday weekend and the service was nice .
443 | the child in the restaurant is quite .
444 | too good it was at the expense of the other customers .
445 | the food was high quality and took some time to make with so much flavor
446 | the beer was nice and warm
447 | the woman who works in the deli is nice
448 | will always be back to kfc- they essentially stole from me !
449 | there are not much better places for breakfast
450 | older waitress was beautiful
451 | it 's very nice
452 | it is good so it does not need be replaced
453 | great place , best food
454 | i will always back there
455 | fees are quite good
456 | anything they say , you don 's neeed to asnk .
457 | this is authentic mexican cuisine
458 | the polite lady was checking us out
459 | built in cabinets in kitchen and bathroom along with sink are fine .
460 | good service and good visit
461 | i do not need to wash it myself
462 | she pay a couple of dishes and feel satisfied
463 | groupon have consistent good prices
464 | the green enchiladas were ok and great .
465 | the problem is solved well
466 | the avail is good .
467 | unicersity is also almost as wonderful as living on king
468 | fantastic job on nails and toes
469 | i would like to purchase any of the pictures based on high quality
470 | the bread is offered at a perfct charge
471 | simply , there are no superior places to go for sushi like this one
472 | took my suite to get fitted and a nice button put on it .
473 | pizza delivery is very prompt
474 | that person get his meal quickly
475 | that place is big , i 'm sure she could have figured it out
476 | they take care of their customers
477 | special desert
478 | a massae with my manicure or pedicure
479 | i called at 6:30 and got the good brush off .
480 | however , this experience went pertty well
481 | good atmosphere here
482 | the scorpions are always amazing
483 | she was getting happy and so were we
484 | they cared about their job much
485 | would have rated higher if i could
486 | very warm and personalized
487 | everyone can have the place to sit
488 | i like to size of the dance floor very much
489 | location is good and can hire more staff
490 | i guess starbucks customer service is good at many locations
491 | their cheese are good !
492 | junior high people are amazing as well as asu
493 | three women met for a nice lunch last friday at elements at the sanctuary .
494 | you are good to me
495 | it really please me
496 | a good guy did my pedicure
497 | it tastes really nice
498 | yes we are going to have a tournament today
499 | the price is very reasonable
500 | they stock lots of common parts


--------------------------------------------------------------------------------
/generation_model/amazon/.ipynb_checkpoints/gen_model-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | logger = logging.getLogger()
  3 | logger.setLevel("ERROR")
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import sys
 10 | import math
 11 | from transformers import *
 12 | 
 13 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
 14 | 
 15 | import json
 16 | f = open('amazon_vocab.json')
 17 | token2num = json.load(f)
 18 | 
 19 | num2token = {}
 20 | for key, value in token2num.items():
 21 |     num2token[value] = key
 22 | 
 23 | class PositionalEncoding(nn.Module):
 24 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 25 |         super(PositionalEncoding, self).__init__()
 26 |         self.dropout = nn.Dropout(p=dropout)
 27 | 
 28 |         pe = torch.zeros(max_len, d_model)
 29 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 30 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 31 |         pe[:, 0::2] = torch.sin(position * div_term)
 32 |         pe[:, 1::2] = torch.cos(position * div_term)
 33 |         pe = pe.unsqueeze(0).transpose(0, 1)
 34 |         self.register_buffer('pe', pe)
 35 | 
 36 |     def forward(self, x):
 37 |         x = x + self.pe[:x.size(0), :]
 38 |         return self.dropout(x)
 39 | 
 40 | class styletransfer(nn.Module):
 41 |     def __init__(self, drop_rate=0, gpu = True):
 42 |         super(styletransfer, self).__init__()
 43 |         self.gpu = gpu
 44 |         self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 45 |         
 46 |         """hyper parameters"""
 47 |         self.n_vocab = 50259
 48 |         self.emb_dim = 256
 49 |         self.nhead = 4
 50 |         self.num_layers = 3
 51 |         
 52 |         """idx & length"""
 53 |         self.START_IDX = 50257
 54 |         self.PAD_IDX = 50258
 55 |         self.EOS_IDX = 50256
 56 |         self.MAX_SENT_LEN = 10
 57 |         
 58 |         """attribute matrix"""
 59 |         ## one_hot encoding
 60 |         self.att_num = 2
 61 |         self.matrix_A = nn.Linear(self.att_num, self.emb_dim)
 62 |         
 63 |         """word embedding"""
 64 |         self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024
 65 |         
 66 |         """Position embedding"""
 67 |         self.pos_encoder = PositionalEncoding(self.emb_dim)
 68 |         
 69 |         """Encoder"""
 70 |         self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 71 |         self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers)       
 72 |         
 73 |         """Decoder"""                
 74 |         self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 75 |         self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers)
 76 |         self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab
 77 |         
 78 |         """parameters"""        
 79 |         self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters())
 80 |         self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters())
 81 |         self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params
 82 | 
 83 |     """Modeling"""
 84 |     def encoder(self, enc_input):
 85 |         """
 86 |         enc_input: (batch, enc_len)
 87 |         """
 88 |         word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim)
 89 |         word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim)
 90 |         word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
 91 |         out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim)
 92 |         
 93 |         return out_enc
 94 |         
 95 |     def decoder(self, enc_out, dec_input, attribute):
 96 |         """
 97 |         enc_out: (enc_len, batch, emb_dim)
 98 |         dec_input: (batch, dec_len)
 99 |         attributes: (batch, 2)
100 |         """
101 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
102 |         
103 |         word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim)
104 |         word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim)
105 |         word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim)    
106 |         
107 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
108 |         start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim)        
109 |         style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start]
110 |         
111 |         tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2)
112 | 
113 |         dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim)
114 |         vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab)
115 |         return dec_out, vocab_out
116 |     
117 |     def generator(self, enc_out, gen_len, attribute):
118 |         """
119 |         enc_out: (enc_len, batch, emb_dim)
120 |         attributes: (batch, 2)
121 |         gen_len: len(dec_in)+1
122 |         """
123 |         # initialization because there are no first token
124 |         batch = enc_out.shape[1]
125 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
126 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
127 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
128 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
129 |         
130 |         for i in range(gen_len):
131 |             tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len)
132 |             dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim)
133 |             vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab)
134 |             
135 |             vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch)
136 |             vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len)
137 |             
138 |             new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim)
139 |             new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim)
140 | #             gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1))
141 |             
142 | #             word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
143 |             gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1        
144 |         
145 |         return vocab_out # (gen_len+2, batch, n_vocab)
146 | 
147 |     def generate_square_subsequent_mask(self,sz): # len(sz)
148 |         mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
149 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
150 |         return mask
151 |     
152 |     """calculation loss"""
153 |     def recon_loss(self, dec_input, vocab_out):
154 |         """
155 |         dec_input: (batch, dec_len)
156 |         vocab_out: (dec_len+2, batch, n_vocab) with [att], [start]
157 |         """
158 |         end_token = torch.tensor(self.EOS_IDX).cuda() # (1)
159 |         end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1)
160 |         target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS]
161 |         
162 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab)
163 |         pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab)
164 |                 
165 |         target_tokens = target_tokens.contiguous() # (batch, dec_len+1)
166 |         pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab)
167 |     
168 |         target_tokens = target_tokens.view(-1) # (batch*(dec_len+1))
169 |         pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab)
170 |         
171 |         recon_loss = F.cross_entropy(pred_out, target_tokens)                
172 |         
173 |         return recon_loss
174 |     
175 |     def cls_loss(self, attributes, cls_out):
176 |         """
177 |         attributes: [0,1] or [1,0]
178 |         cls_out: (batch, 2) (logits)
179 |         """        
180 |         targets = attributes.argmax(1) # (batch)
181 |         cls_loss = F.cross_entropy(cls_out, targets)
182 |         
183 |         if self.gpu == True:       
184 |             return cls_loss.cuda()
185 |         else:
186 |             return cls_loss
187 |         
188 |     """inferenece"""
189 |     def dec2sen(self, vocab_out):
190 |         """
191 |         vocab_out: (dec_len+2, batch, n_vocab) with att, start
192 |         """
193 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END]
194 |         pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch)
195 |         pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1
196 |         
197 |         token_list = []
198 |         dec_sen =''
199 |         for i in range(len(pred_idx)):
200 |             token = num2token[pred_idx[i].cpu().numpy().item()]
201 |             token_list.append(token)
202 |             
203 |             if 'Ġ' in token:
204 |                 token = token.strip('Ġ')
205 |                 dec_sen += ' '
206 |                 dec_sen += token
207 |             else:
208 |                 dec_sen += token
209 |         dec_sen = dec_sen.strip()
210 |             
211 |         
212 |         return token_list, dec_sen
213 |     
214 |     def generated_sentence(self, enc_out, attribute, ori_length):
215 |         """
216 |         enc_out: (enc_len, batch, emb_dim)
217 |         dec_input: (batch, dec_len)
218 |         attributes: (batch, 2)
219 |         """
220 |         batch = enc_out.shape[1]
221 | #         max_len = enc_out.shape[0]+3
222 |         max_len = ori_length+5
223 |         
224 |         # initialization because there are no first token
225 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
226 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
227 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
228 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
229 | 
230 |         tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2)        
231 |         
232 |         dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim)
233 |         vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab)
234 |         _, dec_sen = self.dec2sen(vocab_out)
235 |                 
236 |         gen_vocab_out = []
237 |         for i in range(max_len):            
238 |             if len(dec_sen) == 0:
239 |                 token_idx = torch.tensor([220]).unsqueeze(0).cuda() # (batch, gen_len)    
240 |             else:
241 |                 token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len)                
242 |             if self.EOS_IDX in token_idx:
243 |                 break
244 |                 
245 |             dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab)
246 |             dec_tokens, dec_sen = self.dec2sen(vocab_out)           
247 |             
248 |         return dec_sen
249 | 
250 |             
251 |             
252 |             
253 |             
254 |             
255 |             
256 |             
257 |             
258 |             
259 |             
260 |             
261 |             
262 |     
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 


--------------------------------------------------------------------------------
/generation_model/amazon/.ipynb_checkpoints/train-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | logger = logging.getLogger()
  3 | logger.setLevel("ERROR")
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import numpy as np
  8 | 
  9 | from tqdm import tqdm
 10 | import os
 11 | import random
 12 | 
 13 | from transformers import *
 14 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 15 | from tqdm import tqdm
 16 | import json
 17 | 
 18 | 
 19 | ## 초기화
 20 | from gen_model import *
 21 | genmodel = styletransfer().cuda()
 22 | genmodel.load_state_dict(torch.load('../ST_v2.0/models/gen_model_5'))
 23 | genmodel.train()
 24 | 
 25 | import sys
 26 | sys.path.insert(0, "/DATA/joosung/controllable_english/amazon/classifier/")
 27 | from dis_model import *
 28 | dismodel = findattribute().cuda()
 29 | dismodel_name='cls_model_6'
 30 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))
 31 | dismodel.eval()
 32 | 
 33 | import torch.optim as optim
 34 | 
 35 | from tensorboardX import SummaryWriter
 36 | summary = SummaryWriter(logdir='./logs')
 37 | 
 38 | def main():    
 39 |     f = open('amazon_vocab.json')
 40 |     token2num = json.load(f)
 41 | 
 42 |     num2token = {}
 43 |     for key, value in token2num.items():
 44 |         num2token[value] = key
 45 |     f.close()
 46 | 
 47 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
 48 |     train_amazon_neg_path = data_path + "/amazon/sentiment.train.0"
 49 |     train_amazon_neg_open = open(train_amazon_neg_path, "r")
 50 |     train_amazon_neg_dataset = train_amazon_neg_open.readlines()
 51 |     dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0"
 52 |     dev_amazon_neg_open = open(dev_amazon_neg_path, "r")
 53 |     dev_amazon_neg_dataset = dev_amazon_neg_open.readlines()
 54 |     amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset
 55 |     
 56 |     neg_len = len(amazon_neg_dataset)
 57 |     train_amazon_neg_open.close()
 58 |     dev_amazon_neg_open.close()
 59 | 
 60 |     train_amazon_pos_path = data_path + "/amazon/sentiment.train.1"
 61 |     train_amazon_pos_open = open(train_amazon_pos_path, "r")
 62 |     train_amazon_pos_dataset = train_amazon_pos_open.readlines()
 63 |     dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1"
 64 |     dev_amazon_pos_open = open(dev_amazon_pos_path, "r")
 65 |     dev_amazon_pos_dataset = dev_amazon_pos_open.readlines()
 66 |     amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset
 67 |     
 68 |     pos_len = len(amazon_pos_dataset)
 69 |     train_amazon_pos_open.close()
 70 |     dev_amazon_pos_open.close()      
 71 | 
 72 |     """training parameter"""
 73 |     aed_initial_lr = 0.00001
 74 |     gen_initial_lr = 0.001
 75 |     aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005
 76 |     gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001
 77 |     max_grad_norm = 10
 78 |     batch = 1
 79 |     epoch = 6
 80 |     epoch_len = max(pos_len,neg_len)
 81 |     stop_point = epoch_len*epoch
 82 |     
 83 |     pre_epoch = 0
 84 |     for start in tqdm(range(0, stop_point)):
 85 |         ## learing rate decay
 86 |         now_epoch = (start+1)//pos_len
 87 |             
 88 |         """data start point"""
 89 |         neg_start = start%neg_len
 90 |         pos_start = start%pos_len
 91 | 
 92 |         """data setting"""
 93 |         neg_sentence = amazon_neg_dataset[neg_start].strip()
 94 |         pos_sentence = amazon_pos_dataset[pos_start].strip()                
 95 | 
 96 |         neg_labels = [] # negative labels
 97 |         neg_labels.append([1,0])
 98 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 99 | 
100 |         pos_labels = [] # positive labels
101 |         pos_labels.append([0,1])
102 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
103 | 
104 |         sentences = [neg_sentence, pos_sentence]
105 |         attributes = [neg_attribute, pos_attribute]
106 |         sentiments = [0, 1]
107 | 
108 |         """data input"""
109 |         for i in range(2):
110 |             # k=0: negative, k=1: positive
111 |             sentence = sentences[i]
112 |             attribute = attributes[i] # for decoder
113 |             fake_attribute = attributes[abs(1-i)] # for generate
114 | #             sentiment = sentiments[i] # for delete
115 | 
116 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
117 | 
118 |             # delete model
119 |             max_len = int(token_idx.shape[1]/2)
120 |             dis_out = dismodel.discriminator(token_idx)    
121 |             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete
122 |             
123 |             del_idx = token_idx
124 |             for k in range(max_len):
125 |                 del_idx = dismodel.att_prob(del_idx, sentiment)                
126 |                 dis_out = dismodel.discriminator(del_idx)    
127 |                 sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()
128 |                 if sent_porb < 0.7:
129 |                     break       
130 |                     
131 |             """auto-encoder loss & traning"""
132 |             # training using discriminator loss
133 |             enc_out = genmodel.encoder(del_idx)
134 |             dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)
135 | 
136 |             ## calculation loss
137 |             recon_loss = genmodel.recon_loss(token_idx, vocab_out)
138 |             summary.add_scalar('reconstruction loss', recon_loss.item(), start)
139 |             
140 |             aed_trainer.zero_grad()
141 |             recon_loss.backward(retain_graph=True) # retain_graph=True
142 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)            
143 |             aed_trainer.step()
144 |             
145 |             """decoder classification loss & training"""            
146 |             ## calculation loss
147 |             gen_cls_out = dismodel.gen_discriminator(vocab_out)
148 | 
149 |             ## calculation loss
150 |             gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out)
151 |             summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start)
152 | 
153 |             gen_trainer.zero_grad()
154 |             gen_cls_loss.backward() # retain_graph=True
155 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)
156 |             gen_trainer.step()
157 |             
158 |         
159 |         """savining point"""
160 |         if (start+1)%epoch_len == 0:
161 |             random.shuffle(amazon_neg_dataset)
162 |             random.shuffle(amazon_pos_dataset)
163 |             save_model((start+1)//pos_len)        
164 |     save_model('final') # final_model    
165 | 
166 |     
167 | def save_model(iter):
168 |     if not os.path.exists('models/'):
169 |         os.makedirs('models/')
170 |     torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter))  
171 |     
172 | 
173 | if __name__ == '__main__':
174 |     torch.cuda.empty_cache()
175 |     main()
176 |     
177 | 


--------------------------------------------------------------------------------
/generation_model/amazon/classifier/dis_model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | 
  7 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
  8 | 
  9 | 
 10 | class findattribute(nn.Module):
 11 |     def __init__(self, drop_rate=0, gpu = True):
 12 |         super(findattribute, self).__init__()
 13 |         self.gpu = gpu
 14 |         
 15 |         self.n_vocab = 50259
 16 |         self.emb_dim = 256
 17 |         self.drop_rate = drop_rate
 18 |         
 19 |         """idx & length"""
 20 |         self.EOS_IDX = 50256
 21 |         self.START_IDX = 50257
 22 |         self.PAD_IDX = 50258        
 23 |         
 24 |         """Discriminator(classifier)"""
 25 |         self.word_dim = 256
 26 |         self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024
 27 |         
 28 |         self.channel_out = 100
 29 |         self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim))
 30 |         self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim))
 31 |         self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim))
 32 |         self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim))
 33 |         self.fc_drop = nn.Dropout(self.drop_rate)
 34 |         self.disc_fc = nn.Linear(4*self.channel_out, 2)
 35 |         
 36 |         """parameters"""                
 37 |         self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\
 38 |         list(self.conv2d_5.parameters())+list(self.disc_fc.parameters())
 39 |             
 40 | 
 41 |     def discriminator(self, token_idx):
 42 |         """
 43 |         token_idx: (batch, seq_len)
 44 |         """
 45 |         if token_idx.shape[1] < 5:
 46 |             padding_size = 5-token_idx.shape[1]
 47 |             padding_token = []
 48 |             for k in range(token_idx.shape[0]):
 49 |                 temp = []
 50 |                 for i in range(padding_size):
 51 |                     temp.append(self.PAD_IDX)
 52 |                 padding_token.append(temp)                
 53 |             padding_token=torch.from_numpy(np.array(padding_token))
 54 |             if self.gpu == True:
 55 |                 padding_token = padding_token.cuda()
 56 |             token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5)
 57 | 
 58 |         word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim)
 59 |         word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
 60 | 
 61 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
 62 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
 63 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
 64 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
 65 | 
 66 |         # Max-over-time-pool
 67 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
 68 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
 69 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
 70 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
 71 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
 72 | 
 73 |         x_drop = self.fc_drop(x)
 74 |         y = self.disc_fc(x_drop) # (batch, 2)
 75 | 
 76 |         if self.gpu == True:
 77 |             return y.cuda()
 78 |         else:
 79 |             return y
 80 |         
 81 |     
 82 |     ## inference
 83 |     def gen_discriminator(self, gen_out):
 84 |         """
 85 |         gen_out: (gen_len+2, batch, n_vocab)
 86 |         """
 87 |         gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab)
 88 |         gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1))
 89 |         # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim)
 90 |         gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim)
 91 |         
 92 |         if gen_emb.shape[1] < 5:
 93 |             padding_size = 5-gen_emb.shape[1]
 94 |             padding_token = []
 95 |             for k in range(gen_emb.shape[0]):
 96 |                 temp = []
 97 |                 for i in range(padding_size):
 98 |                     temp.append(self.PAD_IDX)
 99 |                 padding_token.append(temp)                
100 |             padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len)
101 |             if self.gpu == True:
102 |                 padding_token = padding_token.cuda()
103 |             padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim)
104 |             gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim)   
105 |             
106 |         word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
107 | 
108 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
109 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
110 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
111 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
112 | 
113 |         # Max-over-time-pool
114 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
115 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
116 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
117 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
118 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
119 | 
120 |         y = self.disc_fc(x) # (batch, 2)
121 | 
122 |         if self.gpu == True:
123 |             return y.cuda()
124 |         else:
125 |             return y
126 |         
127 |     def att_prob(self, token_idx, sentiment):
128 |         """
129 |         token_idx: (batch, seq_len)
130 |         """
131 |         token_list = token_idx.squeeze(0).cpu().tolist() # list
132 |         min_prob = 1
133 |         for i in range(len(token_list)):
134 |             del_list = token_list[:i] + token_list[i+1:]
135 |             del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda()
136 |             del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item()
137 |             
138 |             if del_prob <= min_prob:                
139 |                 max_ind = i
140 |                 min_prob = del_prob
141 |                 
142 |         final_list = token_list[:max_ind] + token_list[max_ind+1:]
143 |         del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda()
144 |         return del_idx    
145 |         
146 |     def cls_loss(self, targets, cls_out):
147 |         """
148 |         targets: (batch, 2) / attributes [0,1] or [1,0]
149 |         cls_out: (batch, 2) (logits)
150 |         """
151 |         
152 |         final_targets = targets.argmax(1) # (batch)
153 |         cls_loss = F.cross_entropy(cls_out, final_targets)
154 |         
155 |         if self.gpu == True:       
156 |             return cls_loss.cuda()
157 |         else:
158 |             return cls_loss
159 |     
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 


--------------------------------------------------------------------------------
/generation_model/amazon/classifier/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | from tqdm import tqdm
  6 | import os
  7 | import random
  8 | 
  9 | from transformers import *
 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 11 | from tqdm import tqdm
 12 | import json
 13 | 
 14 | 
 15 | ## 초기화
 16 | from dis_model import *
 17 | dismodel = findattribute(drop_rate = 0.4).cuda()
 18 | dismodel.load_state_dict(torch.load('../visual_v1_0/models/cls_model_final'))
 19 | dismodel.train()
 20 | 
 21 | import torch.optim as optim
 22 | 
 23 | from tensorboardX import SummaryWriter
 24 | summary = SummaryWriter(logdir='./logs')
 25 | 
 26 | def main():    
 27 |     f = open('amazon_vocab.json')
 28 |     token2num = json.load(f)
 29 | 
 30 |     num2token = {}
 31 |     for key, value in token2num.items():
 32 |         num2token[value] = key
 33 |     f.close()
 34 | 
 35 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
 36 |     train_amazon_neg_path = data_path + "/amazon/sentiment.train.0"
 37 |     train_amazon_neg_open = open(train_amazon_neg_path, "r")
 38 |     train_amazon_neg_dataset = train_amazon_neg_open.readlines()
 39 |     dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0"
 40 |     dev_amazon_neg_open = open(dev_amazon_neg_path, "r")
 41 |     dev_amazon_neg_dataset = dev_amazon_neg_open.readlines()
 42 |     amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset
 43 |     
 44 |     neg_len = len(amazon_neg_dataset)
 45 |     train_amazon_neg_open.close()
 46 |     dev_amazon_neg_open.close()
 47 | 
 48 |     train_amazon_pos_path = data_path + "/amazon/sentiment.train.1"
 49 |     train_amazon_pos_open = open(train_amazon_pos_path, "r")
 50 |     train_amazon_pos_dataset = train_amazon_pos_open.readlines()
 51 |     dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1"
 52 |     dev_amazon_pos_open = open(dev_amazon_pos_path, "r")
 53 |     dev_amazon_pos_dataset = dev_amazon_pos_open.readlines()
 54 |     amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset
 55 |     
 56 |     pos_len = len(amazon_pos_dataset)
 57 |     train_amazon_pos_open.close()
 58 |     dev_amazon_pos_open.close()    
 59 |     
 60 |     """training parameter"""
 61 |     cls_initial_lr = 0.001
 62 |     cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001
 63 |     max_grad_norm = 25
 64 |     batch = 1
 65 |     epoch = 6
 66 |     stop_point = pos_len*epoch
 67 |     
 68 |     pre_epoch = 0
 69 |     for start in tqdm(range(0, stop_point)):            
 70 |         """data start point"""
 71 |         neg_start = start%neg_len
 72 |         pos_start = start%pos_len
 73 | 
 74 |         """data setting"""
 75 |         neg_sentence = amazon_neg_dataset[neg_start].strip()
 76 |         pos_sentence = amazon_pos_dataset[pos_start].strip()                
 77 | 
 78 |         neg_labels = [] # negative labels
 79 |         neg_labels.append([1,0])
 80 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 81 | 
 82 |         pos_labels = [] # positive labels
 83 |         pos_labels.append([0,1])
 84 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
 85 | 
 86 |         sentences = [neg_sentence, pos_sentence]
 87 |         attributes = [neg_attribute, pos_attribute]
 88 | 
 89 |         """data input"""
 90 |         for i in range(2):
 91 |             # k=0: negative, k=1: positive
 92 |             sentence = sentences[i]
 93 |             attribute = attributes[i] # for generate
 94 | 
 95 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
 96 |             
 97 |             dis_out = dismodel.discriminator(token_idx)
 98 | 
 99 |             """calculation loss & traning"""
100 |             # training using discriminator loss
101 |             cls_loss = dismodel.cls_loss(attribute, dis_out)
102 |             summary.add_scalar('discriminator loss', cls_loss.item(), start)
103 | 
104 |             cls_trainer.zero_grad()
105 |             cls_loss.backward() # retain_graph=True
106 |             grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm)            
107 |             cls_trainer.step()
108 |         
109 |         """savining point"""
110 |         if (start+1)%pos_len == 0:
111 |             random.shuffle(amazon_neg_dataset)
112 |             random.shuffle(amazon_pos_dataset)
113 |             save_model((start+1)//pos_len)        
114 |     save_model('final') # final_model    
115 | 
116 |     
117 | def save_model(iter):
118 |     if not os.path.exists('models/'):
119 |         os.makedirs('models/')
120 |     torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter))  
121 |     
122 | 
123 | if __name__ == '__main__':
124 |     torch.cuda.empty_cache()
125 |     main()
126 |     
127 | 


--------------------------------------------------------------------------------
/generation_model/amazon/gen_model.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | logger = logging.getLogger()
  3 | logger.setLevel("ERROR")
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | import sys
 10 | import math
 11 | from transformers import *
 12 | 
 13 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
 14 | 
 15 | import json
 16 | f = open('amazon_vocab.json')
 17 | token2num = json.load(f)
 18 | 
 19 | num2token = {}
 20 | for key, value in token2num.items():
 21 |     num2token[value] = key
 22 | 
 23 | class PositionalEncoding(nn.Module):
 24 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 25 |         super(PositionalEncoding, self).__init__()
 26 |         self.dropout = nn.Dropout(p=dropout)
 27 | 
 28 |         pe = torch.zeros(max_len, d_model)
 29 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 30 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 31 |         pe[:, 0::2] = torch.sin(position * div_term)
 32 |         pe[:, 1::2] = torch.cos(position * div_term)
 33 |         pe = pe.unsqueeze(0).transpose(0, 1)
 34 |         self.register_buffer('pe', pe)
 35 | 
 36 |     def forward(self, x):
 37 |         x = x + self.pe[:x.size(0), :]
 38 |         return self.dropout(x)
 39 | 
 40 | class styletransfer(nn.Module):
 41 |     def __init__(self, drop_rate=0, gpu = True):
 42 |         super(styletransfer, self).__init__()
 43 |         self.gpu = gpu
 44 |         self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 45 |         
 46 |         """hyper parameters"""
 47 |         self.n_vocab = 50259
 48 |         self.emb_dim = 256
 49 |         self.nhead = 4
 50 |         self.num_layers = 3
 51 |         
 52 |         """idx & length"""
 53 |         self.START_IDX = 50257
 54 |         self.PAD_IDX = 50258
 55 |         self.EOS_IDX = 50256
 56 |         self.MAX_SENT_LEN = 10
 57 |         
 58 |         """attribute matrix"""
 59 |         ## one_hot encoding
 60 |         self.att_num = 2
 61 |         self.matrix_A = nn.Linear(self.att_num, self.emb_dim)
 62 |         
 63 |         """word embedding"""
 64 |         self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024
 65 |         
 66 |         """Position embedding"""
 67 |         self.pos_encoder = PositionalEncoding(self.emb_dim)
 68 |         
 69 |         """Encoder"""
 70 |         self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 71 |         self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers)       
 72 |         
 73 |         """Decoder"""                
 74 |         self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 75 |         self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers)
 76 |         self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab
 77 |         
 78 |         """parameters"""        
 79 |         self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters())
 80 |         self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters())
 81 |         self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params
 82 | 
 83 |     """Modeling"""
 84 |     def encoder(self, enc_input):
 85 |         """
 86 |         enc_input: (batch, enc_len)
 87 |         """
 88 |         word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim)
 89 |         word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim)
 90 |         word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
 91 |         out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim)
 92 |         
 93 |         return out_enc
 94 |         
 95 |     def decoder(self, enc_out, dec_input, attribute):
 96 |         """
 97 |         enc_out: (enc_len, batch, emb_dim)
 98 |         dec_input: (batch, dec_len)
 99 |         attributes: (batch, 2)
100 |         """
101 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
102 |         
103 |         word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim)
104 |         word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim)
105 |         word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim)    
106 |         
107 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
108 |         start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim)        
109 |         style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start]
110 |         
111 |         tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2)
112 | 
113 |         dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim)
114 |         vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab)
115 |         return dec_out, vocab_out
116 |     
117 |     def generator(self, enc_out, gen_len, attribute):
118 |         """
119 |         enc_out: (enc_len, batch, emb_dim)
120 |         attributes: (batch, 2)
121 |         gen_len: len(dec_in)+1
122 |         """
123 |         # initialization because there are no first token
124 |         batch = enc_out.shape[1]
125 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
126 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
127 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
128 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
129 |         
130 |         for i in range(gen_len):
131 |             tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len)
132 |             dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim)
133 |             vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab)
134 |             
135 |             vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch)
136 |             vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len)
137 |             
138 |             new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim)
139 |             new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim)
140 | #             gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1))
141 |             
142 | #             word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
143 |             gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1        
144 |         
145 |         return vocab_out # (gen_len+2, batch, n_vocab)
146 | 
147 |     def generate_square_subsequent_mask(self,sz): # len(sz)
148 |         mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
149 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
150 |         return mask
151 |     
152 |     """calculation loss"""
153 |     def recon_loss(self, dec_input, vocab_out):
154 |         """
155 |         dec_input: (batch, dec_len)
156 |         vocab_out: (dec_len+2, batch, n_vocab) with [att], [start]
157 |         """
158 |         end_token = torch.tensor(self.EOS_IDX).cuda() # (1)
159 |         end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1)
160 |         target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS]
161 |         
162 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab)
163 |         pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab)
164 |                 
165 |         target_tokens = target_tokens.contiguous() # (batch, dec_len+1)
166 |         pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab)
167 |     
168 |         target_tokens = target_tokens.view(-1) # (batch*(dec_len+1))
169 |         pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab)
170 |         
171 |         recon_loss = F.cross_entropy(pred_out, target_tokens)                
172 |         
173 |         return recon_loss
174 |     
175 |     def cls_loss(self, attributes, cls_out):
176 |         """
177 |         attributes: [0,1] or [1,0]
178 |         cls_out: (batch, 2) (logits)
179 |         """        
180 |         targets = attributes.argmax(1) # (batch)
181 |         cls_loss = F.cross_entropy(cls_out, targets)
182 |         
183 |         if self.gpu == True:       
184 |             return cls_loss.cuda()
185 |         else:
186 |             return cls_loss
187 |         
188 |     """inferenece"""
189 |     def dec2sen(self, vocab_out):
190 |         """
191 |         vocab_out: (dec_len+2, batch, n_vocab) with att, start
192 |         """
193 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END]
194 |         pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch)
195 |         pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1
196 |         
197 |         token_list = []
198 |         dec_sen =''
199 |         for i in range(len(pred_idx)):
200 |             token = num2token[pred_idx[i].cpu().numpy().item()]
201 |             token_list.append(token)
202 |             
203 |             if 'Ġ' in token:
204 |                 token = token.strip('Ġ')
205 |                 dec_sen += ' '
206 |                 dec_sen += token
207 |             else:
208 |                 dec_sen += token
209 |         dec_sen = dec_sen.strip()
210 |             
211 |         
212 |         return token_list, dec_sen
213 |     
214 |     def generated_sentence(self, enc_out, attribute, ori_length):
215 |         """
216 |         enc_out: (enc_len, batch, emb_dim)
217 |         dec_input: (batch, dec_len)
218 |         attributes: (batch, 2)
219 |         """
220 |         batch = enc_out.shape[1]
221 | #         max_len = enc_out.shape[0]+3
222 |         max_len = ori_length+5
223 |         
224 |         # initialization because there are no first token
225 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
226 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
227 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
228 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
229 | 
230 |         tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2)        
231 |         
232 |         dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim)
233 |         vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab)
234 |         _, dec_sen = self.dec2sen(vocab_out)
235 |                 
236 |         gen_vocab_out = []
237 |         for i in range(max_len):            
238 |             if len(dec_sen) == 0:
239 |                 token_idx = torch.tensor([220]).unsqueeze(0).cuda() # (batch, gen_len)    
240 |             else:
241 |                 token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len)                
242 |             if self.EOS_IDX in token_idx:
243 |                 break
244 |                 
245 |             dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab)
246 |             dec_tokens, dec_sen = self.dec2sen(vocab_out)           
247 |             
248 |         return dec_sen
249 | 
250 |             
251 |             
252 |             
253 |             
254 |             
255 |             
256 |             
257 |             
258 |             
259 |             
260 |             
261 |             
262 |     
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 


--------------------------------------------------------------------------------
/generation_model/amazon/train.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | logger = logging.getLogger()
  3 | logger.setLevel("ERROR")
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | import numpy as np
  8 | 
  9 | from tqdm import tqdm
 10 | import os
 11 | import random
 12 | 
 13 | from transformers import *
 14 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 15 | from tqdm import tqdm
 16 | import json
 17 | 
 18 | 
 19 | ## 초기화
 20 | from gen_model import *
 21 | genmodel = styletransfer().cuda()
 22 | genmodel.load_state_dict(torch.load('../ST_v2.0/models/gen_model_5'))
 23 | genmodel.train()
 24 | 
 25 | import sys
 26 | sys.path.insert(0, "/DATA/joosung/controllable_english/amazon/classifier/")
 27 | from dis_model import *
 28 | dismodel = findattribute().cuda()
 29 | dismodel_name='cls_model_6'
 30 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))
 31 | dismodel.eval()
 32 | 
 33 | import torch.optim as optim
 34 | 
 35 | from tensorboardX import SummaryWriter
 36 | summary = SummaryWriter(logdir='./logs')
 37 | 
 38 | def main():    
 39 |     f = open('amazon_vocab.json')
 40 |     token2num = json.load(f)
 41 | 
 42 |     num2token = {}
 43 |     for key, value in token2num.items():
 44 |         num2token[value] = key
 45 |     f.close()
 46 | 
 47 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
 48 |     train_amazon_neg_path = data_path + "/amazon/sentiment.train.0"
 49 |     train_amazon_neg_open = open(train_amazon_neg_path, "r")
 50 |     train_amazon_neg_dataset = train_amazon_neg_open.readlines()
 51 |     dev_amazon_neg_path = data_path + "/amazon/sentiment.dev.0"
 52 |     dev_amazon_neg_open = open(dev_amazon_neg_path, "r")
 53 |     dev_amazon_neg_dataset = dev_amazon_neg_open.readlines()
 54 |     amazon_neg_dataset = train_amazon_neg_dataset+dev_amazon_neg_dataset
 55 |     
 56 |     neg_len = len(amazon_neg_dataset)
 57 |     train_amazon_neg_open.close()
 58 |     dev_amazon_neg_open.close()
 59 | 
 60 |     train_amazon_pos_path = data_path + "/amazon/sentiment.train.1"
 61 |     train_amazon_pos_open = open(train_amazon_pos_path, "r")
 62 |     train_amazon_pos_dataset = train_amazon_pos_open.readlines()
 63 |     dev_amazon_pos_path = data_path + "/amazon/sentiment.dev.1"
 64 |     dev_amazon_pos_open = open(dev_amazon_pos_path, "r")
 65 |     dev_amazon_pos_dataset = dev_amazon_pos_open.readlines()
 66 |     amazon_pos_dataset = train_amazon_pos_dataset+dev_amazon_pos_dataset
 67 |     
 68 |     pos_len = len(amazon_pos_dataset)
 69 |     train_amazon_pos_open.close()
 70 |     dev_amazon_pos_open.close()      
 71 | 
 72 |     """training parameter"""
 73 |     aed_initial_lr = 0.00001
 74 |     gen_initial_lr = 0.001
 75 |     aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005
 76 |     gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001
 77 |     max_grad_norm = 10
 78 |     batch = 1
 79 |     epoch = 6
 80 |     epoch_len = max(pos_len,neg_len)
 81 |     stop_point = epoch_len*epoch
 82 |     
 83 |     pre_epoch = 0
 84 |     for start in tqdm(range(0, stop_point)):
 85 |         ## learing rate decay
 86 |         now_epoch = (start+1)//pos_len
 87 |             
 88 |         """data start point"""
 89 |         neg_start = start%neg_len
 90 |         pos_start = start%pos_len
 91 | 
 92 |         """data setting"""
 93 |         neg_sentence = amazon_neg_dataset[neg_start].strip()
 94 |         pos_sentence = amazon_pos_dataset[pos_start].strip()                
 95 | 
 96 |         neg_labels = [] # negative labels
 97 |         neg_labels.append([1,0])
 98 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 99 | 
100 |         pos_labels = [] # positive labels
101 |         pos_labels.append([0,1])
102 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
103 | 
104 |         sentences = [neg_sentence, pos_sentence]
105 |         attributes = [neg_attribute, pos_attribute]
106 |         sentiments = [0, 1]
107 | 
108 |         """data input"""
109 |         for i in range(2):
110 |             # k=0: negative, k=1: positive
111 |             sentence = sentences[i]
112 |             attribute = attributes[i] # for decoder
113 |             fake_attribute = attributes[abs(1-i)] # for generate
114 | #             sentiment = sentiments[i] # for delete
115 | 
116 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
117 | 
118 |             # delete model
119 |             max_len = int(token_idx.shape[1]/2)
120 |             dis_out = dismodel.discriminator(token_idx)    
121 |             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete
122 |             
123 |             del_idx = token_idx
124 |             for k in range(max_len):
125 |                 del_idx = dismodel.att_prob(del_idx, sentiment)                
126 |                 dis_out = dismodel.discriminator(del_idx)    
127 |                 sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()
128 |                 if sent_porb < 0.7:
129 |                     break       
130 |                     
131 |             """auto-encoder loss & traning"""
132 |             # training using discriminator loss
133 |             enc_out = genmodel.encoder(del_idx)
134 |             dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)
135 | 
136 |             ## calculation loss
137 |             recon_loss = genmodel.recon_loss(token_idx, vocab_out)
138 |             summary.add_scalar('reconstruction loss', recon_loss.item(), start)
139 |             
140 |             aed_trainer.zero_grad()
141 |             recon_loss.backward(retain_graph=True) # retain_graph=True
142 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)            
143 |             aed_trainer.step()
144 |             
145 |             """decoder classification loss & training"""            
146 |             ## calculation loss
147 |             gen_cls_out = dismodel.gen_discriminator(vocab_out)
148 | 
149 |             ## calculation loss
150 |             gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out)
151 |             summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start)
152 | 
153 |             gen_trainer.zero_grad()
154 |             gen_cls_loss.backward() # retain_graph=True
155 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)
156 |             gen_trainer.step()
157 |             
158 |         
159 |         """savining point"""
160 |         if (start+1)%epoch_len == 0:
161 |             random.shuffle(amazon_neg_dataset)
162 |             random.shuffle(amazon_pos_dataset)
163 |             save_model((start+1)//pos_len)        
164 |     save_model('final') # final_model    
165 | 
166 |     
167 | def save_model(iter):
168 |     if not os.path.exists('models/'):
169 |         os.makedirs('models/')
170 |     torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter))  
171 |     
172 | 
173 | if __name__ == '__main__':
174 |     torch.cuda.empty_cache()
175 |     main()
176 |     
177 | 


--------------------------------------------------------------------------------
/generation_model/inference.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import torch\n",
 10 |     "from transformers import *\n",
 11 |     "\n",
 12 |     "gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')\n",
 13 |     "\n",
 14 |     "from tqdm import tqdm"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import json\n",
 24 |     "f = open('gpt_yelp_vocab.json')\n",
 25 |     "token2num = json.load(f)\n",
 26 |     "\n",
 27 |     "num2token = {}\n",
 28 |     "for key, value in token2num.items():\n",
 29 |     "    num2token[value] = key"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": 3,
 35 |    "metadata": {},
 36 |    "outputs": [
 37 |     {
 38 |      "data": {
 39 |       "text/plain": [
 40 |        "findattribute(\n",
 41 |        "  (word_emb): Embedding(50259, 256, padding_idx=50258)\n",
 42 |        "  (conv2d_2): Conv2d(1, 100, kernel_size=(2, 256), stride=(1, 1))\n",
 43 |        "  (conv2d_3): Conv2d(1, 100, kernel_size=(3, 256), stride=(1, 1))\n",
 44 |        "  (conv2d_4): Conv2d(1, 100, kernel_size=(4, 256), stride=(1, 1))\n",
 45 |        "  (conv2d_5): Conv2d(1, 100, kernel_size=(5, 256), stride=(1, 1))\n",
 46 |        "  (disc_fc): Linear(in_features=400, out_features=2, bias=True)\n",
 47 |        ")"
 48 |       ]
 49 |      },
 50 |      "execution_count": 3,
 51 |      "metadata": {},
 52 |      "output_type": "execute_result"
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "import torch\n",
 57 |     "import numpy as np\n",
 58 |     "import torch.nn as nn\n",
 59 |     "import sys\n",
 60 |     "\n",
 61 |     "sys.path.insert(0, \"/DATA/joosung/controllable_english/yelp/classifier/\")\n",
 62 |     "from dis_model import *\n",
 63 |     "dismodel = findattribute().cuda()\n",
 64 |     "dismodel_name='cls_model_3'\n",
 65 |     "dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))\n",
 66 |     "dismodel.eval()\n",
 67 |     "\n",
 68 |     "# from gen_model import *\n",
 69 |     "# genmodel = styletransfer().cuda()\n",
 70 |     "# genmodel_name='gen_model_3'\n",
 71 |     "# genmodel.load_state_dict(torch.load('./models/{}'.format(genmodel_name)))\n",
 72 |     "# genmodel.eval()\n",
 73 |     "# print('ok')"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "import tqdm\n",
 83 |     "data_path = \"/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data\"\n",
 84 |     "yelp_neg_path = data_path + \"/yelp/sentiment.test.0\"\n",
 85 |     "yelp_neg_open = open(yelp_neg_path, \"r\")\n",
 86 |     "yelp_neg_dataset = yelp_neg_open.readlines()\n",
 87 |     "neg_len = len(yelp_neg_dataset)\n",
 88 |     "yelp_neg_open.close()\n",
 89 |     "\n",
 90 |     "yelp_pos_path = data_path + \"/yelp/sentiment.test.1\"\n",
 91 |     "yelp_pos_open = open(yelp_pos_path, \"r\")\n",
 92 |     "yelp_pos_dataset = yelp_pos_open.readlines()\n",
 93 |     "pos_len = len(yelp_pos_dataset)\n",
 94 |     "yelp_pos_open.close()\n",
 95 |     "\n",
 96 |     "stop_point = 30\n",
 97 |     "# stop_point = pos_len*epoch+batch\n",
 98 |     "\n",
 99 |     "PAD_IDX = 50258\n",
100 |     "\n",
101 |     "for start in range(stop_point-1, stop_point):\n",
102 |     "    \"\"\"data start point\"\"\"\n",
103 |     "    neg_start = start%neg_len\n",
104 |     "    pos_start = start%pos_len\n",
105 |     "    \n",
106 |     "    \"\"\"data setting\"\"\"\n",
107 |     "    neg_sentence = yelp_neg_dataset[neg_start].strip()\n",
108 |     "    pos_sentence = yelp_pos_dataset[pos_start].strip()                \n",
109 |     "        \n",
110 |     "    neg_labels = [] # negative labels\n",
111 |     "    neg_labels.append([1,0])\n",
112 |     "    neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()\n",
113 |     "\n",
114 |     "    pos_labels = [] # positive labels\n",
115 |     "    pos_labels.append([0,1])\n",
116 |     "    pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()\n",
117 |     "\n",
118 |     "    sentences = [neg_sentence, pos_sentence]\n",
119 |     "    attributes = [neg_attribute, pos_attribute]\n",
120 |     "    fake_attributes = [pos_attribute, neg_attribute]\n",
121 |     "    sentiments = [0, 1]\n",
122 |     "    \"\"\"data input\"\"\"\n",
123 |     "    for i in range(2):\n",
124 |     "        # k=0: negative, k=1: positive\n",
125 |     "        sentence = sentences[i]        \n",
126 |     "        for k in range(6):\n",
127 |     "            fake_attribute = k/5*attributes[0] + (1-k/5)*attributes[1]            \n",
128 |     "#             attribute = attributes[i] # for decoder\n",
129 |     "#             fake_attribute = attributes[abs(1-i)] # for generate            \n",
130 |     "\n",
131 |     "            token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()\n",
132 |     "            ori_length = token_idx.shape[1]\n",
133 |     "\n",
134 |     "            # delete model\n",
135 |     "            max_len = int(token_idx.shape[1]/2)\n",
136 |     "            sentiment = sentiments[i] # for delete\n",
137 |     "#             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete\n",
138 |     "#             dis_out = dismodel.discriminator(token_idx)                \n",
139 |     "\n",
140 |     "            del_idx = token_idx\n",
141 |     "            for k in range(max_len):\n",
142 |     "                del_idx = dismodel.att_prob(del_idx, sentiment)           \n",
143 |     "                dis_out = dismodel.discriminator(del_idx)    \n",
144 |     "                sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()\n",
145 |     "                if sent_porb < 0.7:\n",
146 |     "                    break     \n",
147 |     "\n",
148 |     "            del_list = del_idx.squeeze(0).cpu().tolist() # list\n",
149 |     "            del_sen =''\n",
150 |     "            for x in range(len(del_list)):            \n",
151 |     "                token = num2token[del_list[x]].strip('Ġ')\n",
152 |     "                del_sen += token\n",
153 |     "                del_sen += ' '\n",
154 |     "            del_sen = del_sen.strip()\n",
155 |     "\n",
156 |     "            del_percent = 100-(del_idx.shape[1])/(token_idx.shape[1]) * 100\n",
157 |     "\n",
158 |     "            enc_out = genmodel.encoder(del_idx)\n",
159 |     "#             dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)\n",
160 |     "\n",
161 |     "#             dec_tokens, dec_sen = genmodel.dec2sen(vocab_out)\n",
162 |     "\n",
163 |     "#             gen_sen_1 = genmodel.generated_sentence(enc_out, attribute, ori_length)\n",
164 |     "            gen_sen_2 = genmodel.generated_sentence(enc_out, fake_attribute, ori_length)\n",
165 |     "\n",
166 |     "            print('Original Attribute: ', sentiment)\n",
167 |     "            print('Original Sentence: ', sentence)\n",
168 |     "            print('Delete Sentence: {}, {}%'.format(del_sen, del_percent))\n",
169 |     "#             print('Reconstruction(decoder) Sentence: ', dec_sen)\n",
170 |     "#             print('Reconstruction(generator) Sentence', sentiment, ': ', gen_sen_1.rstrip('<|endoftext|>'))      \n",
171 |     "#             print('Style transfer(generator) Sentence', abs(1-sentiment), ': ', gen_sen_2.rstrip('<|endoftext|>'))\n",
172 |     "            print('Style transfer(generator) Sentence', fake_attribute.cpu().numpy().tolist()[0], ': ', gen_sen_2.rstrip('<|endoftext|>'))    \n",
173 |     "            print('')        \n",
174 |     "        \n",
175 |     "        \n"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 5,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stderr",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "\n",
188 |       "  0%|          | 0/2 [00:00<?, ?it/s]\u001b[A\n",
189 |       " 50%|█████     | 1/2 [09:07<09:07, 547.45s/it]\u001b[A\n",
190 |       "100%|██████████| 2/2 [18:58<00:00, 569.13s/it]\u001b[A\n"
191 |      ]
192 |     }
193 |    ],
194 |    "source": [
195 |     "## test data 저장\n",
196 |     "import torch\n",
197 |     "import numpy as np\n",
198 |     "import torch.nn as nn\n",
199 |     "import sys\n",
200 |     "\n",
201 |     "sys.path.insert(0, \"/DATA/joosung/controllable_english/classifier/\")\n",
202 |     "from dis_model import *\n",
203 |     "dismodel = findattribute().cuda()\n",
204 |     "dismodel_name='cls_model_3'\n",
205 |     "dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))\n",
206 |     "dismodel.eval()\n",
207 |     "\n",
208 |     "from tqdm import tqdm\n",
209 |     "from gen_model import *\n",
210 |     "genmodel = styletransfer().cuda()\n",
211 |     "\n",
212 |     "data_path = \"/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data\"\n",
213 |     "yelp_neg_path = data_path + \"/yelp/sentiment.test.0\"\n",
214 |     "yelp_neg_open = open(yelp_neg_path, \"r\")\n",
215 |     "yelp_neg_dataset = yelp_neg_open.readlines()\n",
216 |     "neg_len = len(yelp_neg_dataset)\n",
217 |     "yelp_neg_open.close()\n",
218 |     "\n",
219 |     "yelp_pos_path = data_path + \"/yelp/sentiment.test.1\"\n",
220 |     "yelp_pos_open = open(yelp_pos_path, \"r\")\n",
221 |     "yelp_pos_dataset = yelp_pos_open.readlines()\n",
222 |     "pos_len = len(yelp_pos_dataset)\n",
223 |     "yelp_pos_open.close()\n",
224 |     "\n",
225 |     "stop_point = pos_len\n",
226 |     "\n",
227 |     "PAD_IDX = 50258\n",
228 |     "\n",
229 |     "name_list = [1,2]\n",
230 |     "prob = 0.6\n",
231 |     "save_prob = '06'\n",
232 |     "for name in tqdm(range(len(name_list))):\n",
233 |     "    for m in range(4):\n",
234 |     "        if m==0:\n",
235 |     "            per = 0\n",
236 |     "        elif m==1:\n",
237 |     "            per = 50\n",
238 |     "        elif m==2:\n",
239 |     "            per = 60\n",
240 |     "        else:\n",
241 |     "            per = 70\n",
242 |     "            \n",
243 |     "        genmodel_name='gen_model_' + str(name_list[name])\n",
244 |     "        genmodel.load_state_dict(torch.load('./models/{}'.format(genmodel_name)))\n",
245 |     "        genmodel.eval()\n",
246 |     "        model0 = 'sentiment.test.0.' + 'joo' + str(name_list[name])+'_'+str(per)+'_'+str(save_prob)\n",
247 |     "        model1 = 'sentiment.test.1.' + 'joo' + str(name_list[name])+'_'+str(per)+'_'+str(save_prob)\n",
248 |     "        f0 = open(model0, 'w')\n",
249 |     "        f1 = open(model1, 'w')\n",
250 |     "\n",
251 |     "        for start in range(stop_point):\n",
252 |     "            \"\"\"data start point\"\"\"\n",
253 |     "            neg_start = start\n",
254 |     "            pos_start = start\n",
255 |     "\n",
256 |     "            \"\"\"data setting\"\"\"\n",
257 |     "            neg_sentence = yelp_neg_dataset[neg_start].strip()\n",
258 |     "            pos_sentence = yelp_pos_dataset[pos_start].strip()                \n",
259 |     "\n",
260 |     "            neg_labels = [] # negative labels\n",
261 |     "            neg_labels.append([1,0])\n",
262 |     "            neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()\n",
263 |     "\n",
264 |     "            pos_labels = [] # positive labels\n",
265 |     "            pos_labels.append([0,1])\n",
266 |     "            pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()\n",
267 |     "\n",
268 |     "            sentences = [neg_sentence, pos_sentence]\n",
269 |     "            attributes = [neg_attribute, pos_attribute]\n",
270 |     "            fake_attributes = [pos_attribute, neg_attribute]\n",
271 |     "            sentiments = [0, 1]\n",
272 |     "            \"\"\"data input\"\"\"\n",
273 |     "            for i in range(2):\n",
274 |     "                # k=0: negative, k=1: positive\n",
275 |     "                sentence = sentences[i]\n",
276 |     "                attribute = attributes[i] # for decoder\n",
277 |     "                fake_attribute = attributes[abs(1-i)] # for generate\n",
278 |     "                sentiment = sentiments[i] # for delete\n",
279 |     "\n",
280 |     "                token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()\n",
281 |     "                ori_length = token_idx.shape[1]\n",
282 |     "\n",
283 |     "                # delete model\n",
284 |     "                if per == 0:\n",
285 |     "                    max_len = int(token_idx.shape[1]-1) # 0%\n",
286 |     "                elif per == 50:\n",
287 |     "                    max_len = int(token_idx.shape[1]/2) # 50%\n",
288 |     "                elif per == 60:\n",
289 |     "                    max_len = int(token_idx.shape[1]/10*4) # 60%\n",
290 |     "                else:\n",
291 |     "                    max_len = int(token_idx.shape[1]/10*3) # 70%            \n",
292 |     "    #             max_len = 0 # 100%\n",
293 |     "\n",
294 |     "#                 dis_out = dismodel.discriminator(token_idx)    \n",
295 |     "#                 sentiment = dis_out.argmax(1).cpu().item() ## for delete\n",
296 |     "\n",
297 |     "                del_idx = token_idx\n",
298 |     "                for k in range(max_len):\n",
299 |     "                    del_idx = dismodel.att_prob(del_idx, sentiment)  \n",
300 |     "                    dis_out = dismodel.discriminator(del_idx)    \n",
301 |     "                    sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()\n",
302 |     "                    if sent_porb < prob: # 0.7\n",
303 |     "                        break \n",
304 |     "\n",
305 |     "                del_list = del_idx.squeeze(0).cpu().tolist() # list\n",
306 |     "                del_sen =''\n",
307 |     "                for x in range(len(del_list)):            \n",
308 |     "                    token = num2token[del_list[x]].strip('Ġ')\n",
309 |     "                    del_sen += token\n",
310 |     "                    del_sen += ' '\n",
311 |     "                del_sen = del_sen.strip()\n",
312 |     "\n",
313 |     "                del_percent = 100-(del_idx.shape[1])/(token_idx.shape[1]) * 100\n",
314 |     "\n",
315 |     "                enc_out = genmodel.encoder(del_idx)\n",
316 |     "                dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)\n",
317 |     "\n",
318 |     "                dec_tokens, dec_sen = genmodel.dec2sen(vocab_out)\n",
319 |     "\n",
320 |     "                gen_sen_2 = genmodel.generated_sentence(enc_out, fake_attribute, ori_length).replace('<|endoftext|>', '')\n",
321 |     "\n",
322 |     "                if i == 0:\n",
323 |     "                    f0.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n",
324 |     "                if i == 1:\n",
325 |     "                    f1.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n",
326 |     "        f0.close()\n",
327 |     "        f1.close()\n",
328 |     "        \n",
329 |     "        \n"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {},
336 |    "outputs": [],
337 |    "source": [
338 |     "## specific model test data\n",
339 |     "import torch\n",
340 |     "import numpy as np\n",
341 |     "import torch.nn as nn\n",
342 |     "import sys\n",
343 |     "\n",
344 |     "sys.path.insert(0, \"/DATA/joosung/controllable_english/classifier/\")\n",
345 |     "from dis_model import *\n",
346 |     "dismodel = findattribute().cuda()\n",
347 |     "dismodel_name='cls_model_3'\n",
348 |     "dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))\n",
349 |     "dismodel.eval()\n",
350 |     "\n",
351 |     "from tqdm import tqdm\n",
352 |     "from gen_model import *\n",
353 |     "genmodel = styletransfer().cuda()\n",
354 |     "\n",
355 |     "data_path = \"/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data\"\n",
356 |     "yelp_neg_path = data_path + \"/yelp/sentiment.test.0\"\n",
357 |     "yelp_neg_open = open(yelp_neg_path, \"r\")\n",
358 |     "yelp_neg_dataset = yelp_neg_open.readlines()\n",
359 |     "neg_len = len(yelp_neg_dataset)\n",
360 |     "yelp_neg_open.close()\n",
361 |     "\n",
362 |     "yelp_pos_path = data_path + \"/yelp/sentiment.test.1\"\n",
363 |     "yelp_pos_open = open(yelp_pos_path, \"r\")\n",
364 |     "yelp_pos_dataset = yelp_pos_open.readlines()\n",
365 |     "pos_len = len(yelp_pos_dataset)\n",
366 |     "yelp_pos_open.close()\n",
367 |     "\n",
368 |     "stop_point = pos_len\n",
369 |     "\n",
370 |     "PAD_IDX = 50258\n",
371 |     "\n",
372 |     "name_list = [1,2,3,4,5,6]\n",
373 |     "for name in tqdm(range(len(name_list))):\n",
374 |     "    genmodel_name='gen_model_' + str(name_list[name])\n",
375 |     "    genmodel.load_state_dict(torch.load('./models/{}'.format(genmodel_name)))\n",
376 |     "    genmodel.eval()\n",
377 |     "    model0 = 'sentiment.test.0.' + 'joo' + str(name_list[name])\n",
378 |     "    model1 = 'sentiment.test.1.' + 'joo' + str(name_list[name])\n",
379 |     "    f0 = open(model0, 'w')\n",
380 |     "    f1 = open(model1, 'w')\n",
381 |     "\n",
382 |     "    for start in range(stop_point):\n",
383 |     "        \"\"\"data start point\"\"\"\n",
384 |     "        neg_start = start\n",
385 |     "        pos_start = start\n",
386 |     "\n",
387 |     "        \"\"\"data setting\"\"\"\n",
388 |     "        neg_sentence = yelp_neg_dataset[neg_start].strip()\n",
389 |     "        pos_sentence = yelp_pos_dataset[pos_start].strip()                \n",
390 |     "\n",
391 |     "        neg_labels = [] # negative labels\n",
392 |     "        neg_labels.append([1,0])\n",
393 |     "        neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()\n",
394 |     "\n",
395 |     "        pos_labels = [] # positive labels\n",
396 |     "        pos_labels.append([0,1])\n",
397 |     "        pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()\n",
398 |     "\n",
399 |     "        sentences = [neg_sentence, pos_sentence]\n",
400 |     "        attributes = [neg_attribute, pos_attribute]\n",
401 |     "        fake_attributes = [pos_attribute, neg_attribute]\n",
402 |     "        sentiments = [0, 1]\n",
403 |     "        \"\"\"data input\"\"\"\n",
404 |     "        for i in range(2):\n",
405 |     "            # k=0: negative, k=1: positive\n",
406 |     "            sentence = sentences[i]\n",
407 |     "            attribute = attributes[i] # for decoder\n",
408 |     "            fake_attribute = attributes[abs(1-i)] # for generate\n",
409 |     "            sentiment = sentiments[i] # for delete\n",
410 |     "\n",
411 |     "            token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()\n",
412 |     "            ori_length = token_idx.shape[1]\n",
413 |     "\n",
414 |     "            # delete model\n",
415 |     "            max_len = int(token_idx.shape[1]/10*4) # 60%\n",
416 |     "            \n",
417 |     "\n",
418 |     "            dis_out = dismodel.discriminator(token_idx)    \n",
419 |     "#             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete\n",
420 |     "\n",
421 |     "            del_idx = token_idx\n",
422 |     "            for k in range(max_len):\n",
423 |     "                del_idx = dismodel.att_prob(del_idx, sentiment)  \n",
424 |     "                dis_out = dismodel.discriminator(del_idx)    \n",
425 |     "                sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()\n",
426 |     "                if sent_porb < 0.6: # 0.7\n",
427 |     "                    break \n",
428 |     "\n",
429 |     "            del_list = del_idx.squeeze(0).cpu().tolist() # list\n",
430 |     "            del_sen =''\n",
431 |     "            for x in range(len(del_list)):            \n",
432 |     "                token = num2token[del_list[x]].strip('Ġ')\n",
433 |     "                del_sen += token\n",
434 |     "                del_sen += ' '\n",
435 |     "            del_sen = del_sen.strip()\n",
436 |     "\n",
437 |     "            del_percent = 100-(del_idx.shape[1])/(token_idx.shape[1]) * 100\n",
438 |     "\n",
439 |     "            enc_out = genmodel.encoder(del_idx)\n",
440 |     "            dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)\n",
441 |     "\n",
442 |     "            dec_tokens, dec_sen = genmodel.dec2sen(vocab_out)\n",
443 |     "\n",
444 |     "            gen_sen_2 = genmodel.generated_sentence(enc_out, fake_attribute, ori_length).replace('<|endoftext|>', '')\n",
445 |     "\n",
446 |     "            if i == 0:\n",
447 |     "                f0.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n",
448 |     "            if i == 1:\n",
449 |     "                f1.write(sentence+'\\t'+gen_sen_2+'\\t'+str(sentiment)+'\\n')\n",
450 |     "    f0.close()\n",
451 |     "    f1.close()\n",
452 |     "\n",
453 |     "        \n"
454 |    ]
455 |   },
456 |   {
457 |    "cell_type": "code",
458 |    "execution_count": null,
459 |    "metadata": {},
460 |    "outputs": [],
461 |    "source": []
462 |   }
463 |  ],
464 |  "metadata": {
465 |   "kernelspec": {
466 |    "display_name": "Python 3",
467 |    "language": "python",
468 |    "name": "python3"
469 |   },
470 |   "language_info": {
471 |    "codemirror_mode": {
472 |     "name": "ipython",
473 |     "version": 3
474 |    },
475 |    "file_extension": ".py",
476 |    "mimetype": "text/x-python",
477 |    "name": "python",
478 |    "nbconvert_exporter": "python",
479 |    "pygments_lexer": "ipython3",
480 |    "version": "3.6.9"
481 |   }
482 |  },
483 |  "nbformat": 4,
484 |  "nbformat_minor": 4
485 | }
486 | 


--------------------------------------------------------------------------------
/generation_model/yelp/.ipynb_checkpoints/gen_model-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | import math
  7 | from transformers import *
  8 | 
  9 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
 10 | 
 11 | import json
 12 | f = open('gpt_yelp_vocab.json')
 13 | token2num = json.load(f)
 14 | 
 15 | num2token = {}
 16 | for key, value in token2num.items():
 17 |     num2token[value] = key
 18 | 
 19 | class PositionalEncoding(nn.Module):
 20 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 21 |         super(PositionalEncoding, self).__init__()
 22 |         self.dropout = nn.Dropout(p=dropout)
 23 | 
 24 |         pe = torch.zeros(max_len, d_model)
 25 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 26 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 27 |         pe[:, 0::2] = torch.sin(position * div_term)
 28 |         pe[:, 1::2] = torch.cos(position * div_term)
 29 |         pe = pe.unsqueeze(0).transpose(0, 1)
 30 |         self.register_buffer('pe', pe)
 31 | 
 32 |     def forward(self, x):
 33 |         x = x + self.pe[:x.size(0), :]
 34 |         return self.dropout(x)
 35 | 
 36 | class styletransfer(nn.Module):
 37 |     def __init__(self, drop_rate=0, gpu = True):
 38 |         super(styletransfer, self).__init__()
 39 |         self.gpu = gpu
 40 |         self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 41 |         
 42 |         """hyper parameters"""
 43 |         self.n_vocab = 50259
 44 |         self.emb_dim = 256
 45 |         self.nhead = 4
 46 |         self.num_layers = 3
 47 |         
 48 |         """idx & length"""
 49 |         self.START_IDX = 50257
 50 |         self.PAD_IDX = 50258
 51 |         self.EOS_IDX = 50256
 52 |         self.MAX_SENT_LEN = 10
 53 |         
 54 |         """attribute matrix"""
 55 |         ## one_hot encoding
 56 |         self.att_num = 2
 57 |         self.matrix_A = nn.Linear(self.att_num, self.emb_dim)
 58 |         
 59 |         """word embedding"""
 60 |         self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024
 61 |         
 62 |         """Position embedding"""
 63 |         self.pos_encoder = PositionalEncoding(self.emb_dim)
 64 |         
 65 |         """Encoder"""
 66 |         self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 67 |         self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers)       
 68 |         
 69 |         """Decoder"""                
 70 |         self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 71 |         self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers)
 72 |         self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab
 73 |         
 74 |         """parameters"""        
 75 |         self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters())
 76 |         self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters())
 77 |         self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params
 78 | 
 79 |     """Modeling"""
 80 |     def encoder(self, enc_input):
 81 |         """
 82 |         enc_input: (batch, enc_len)
 83 |         """
 84 |         word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim)
 85 |         word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim)
 86 |         word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
 87 |         out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim)
 88 |         
 89 |         return out_enc
 90 |         
 91 |     def decoder(self, enc_out, dec_input, attribute):
 92 |         """
 93 |         enc_out: (enc_len, batch, emb_dim)
 94 |         dec_input: (batch, dec_len)
 95 |         attributes: (batch, 2)
 96 |         """
 97 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
 98 |         
 99 |         word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim)
100 |         word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim)
101 |         word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim)    
102 |         
103 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
104 |         start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim)        
105 |         style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start]
106 |         
107 |         tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2)
108 | 
109 |         dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim)
110 |         vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab)
111 |         return dec_out, vocab_out
112 |     
113 |     def generator(self, enc_out, gen_len, attribute):
114 |         """
115 |         enc_out: (enc_len, batch, emb_dim)
116 |         attributes: (batch, 2)
117 |         gen_len: len(dec_in)+1
118 |         """
119 |         # initialization because there are no first token
120 |         batch = enc_out.shape[1]
121 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
122 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
123 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
124 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
125 |         
126 |         for i in range(gen_len):
127 |             tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len)
128 |             dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim)
129 |             vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab)
130 |             
131 |             vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch)
132 |             vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len)
133 |             
134 |             new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim)
135 |             new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim)
136 | #             gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1))
137 |             
138 | #             word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
139 |             gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1        
140 |         
141 |         return vocab_out # (gen_len+2, batch, n_vocab)
142 | 
143 |     def generate_square_subsequent_mask(self,sz): # len(sz)
144 |         mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
145 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
146 |         return mask
147 |     
148 |     """calculation loss"""
149 |     def recon_loss(self, dec_input, vocab_out):
150 |         """
151 |         dec_input: (batch, dec_len)
152 |         vocab_out: (dec_len+2, batch, n_vocab) with [att], [start]
153 |         """
154 |         end_token = torch.tensor(self.EOS_IDX).cuda() # (1)
155 |         end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1)
156 |         target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS]
157 |         
158 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab)
159 |         pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab)
160 |                 
161 |         target_tokens = target_tokens.contiguous() # (batch, dec_len+1)
162 |         pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab)
163 |     
164 |         target_tokens = target_tokens.view(-1) # (batch*(dec_len+1))
165 |         pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab)
166 |         
167 |         recon_loss = F.cross_entropy(pred_out, target_tokens)                
168 |         
169 |         return recon_loss
170 |     
171 |     def cls_loss(self, attributes, cls_out):
172 |         """
173 |         attributes: [0,1] or [1,0]
174 |         cls_out: (batch, 2) (logits)
175 |         """        
176 |         targets = attributes.argmax(1) # (batch)
177 |         cls_loss = F.cross_entropy(cls_out, targets)
178 |         
179 |         if self.gpu == True:       
180 |             return cls_loss.cuda()
181 |         else:
182 |             return cls_loss
183 |         
184 |     """inferenece"""
185 |     def dec2sen(self, vocab_out):
186 |         """
187 |         vocab_out: (dec_len+2, batch, n_vocab) with att, start
188 |         """
189 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END]
190 |         pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch)
191 |         pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1
192 |         
193 |         token_list = []
194 |         dec_sen =''
195 |         for i in range(len(pred_idx)):
196 |             token = num2token[pred_idx[i].cpu().numpy().item()]
197 |             token_list.append(token)
198 |             
199 |             if 'Ġ' in token:
200 |                 token = token.strip('Ġ')
201 |                 dec_sen += ' '
202 |                 dec_sen += token
203 |             else:
204 |                 dec_sen += token
205 |         dec_sen = dec_sen.strip()
206 |             
207 |         
208 |         return token_list, dec_sen
209 |     
210 |     def generated_sentence(self, enc_out, attribute, ori_length):
211 |         """
212 |         enc_out: (enc_len, batch, emb_dim)
213 |         dec_input: (batch, dec_len)
214 |         attributes: (batch, 2)
215 |         """
216 |         batch = enc_out.shape[1]
217 | #         max_len = enc_out.shape[0]+3
218 |         max_len = ori_length+5
219 |         
220 |         # initialization because there are no first token
221 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
222 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
223 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
224 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
225 | 
226 |         tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2)        
227 |         
228 |         dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim)
229 |         vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab)
230 |         _, dec_sen = self.dec2sen(vocab_out)
231 |         
232 |         gen_vocab_out = []
233 |         for i in range(max_len):            
234 |             token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len)
235 |             if self.EOS_IDX in token_idx:
236 |                 break
237 |                 
238 |             dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab)
239 |             dec_tokens, dec_sen = self.dec2sen(vocab_out)           
240 |             
241 |         return dec_sen
242 | 
243 |             
244 |             
245 |             
246 |             
247 |             
248 |             
249 |             
250 |             
251 |             
252 |             
253 |             
254 |             
255 |     
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/generation_model/yelp/.ipynb_checkpoints/train-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | from tqdm import tqdm
  6 | import os
  7 | import random
  8 | 
  9 | from transformers import *
 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 11 | from tqdm import tqdm
 12 | import json
 13 | 
 14 | 
 15 | ## 초기화
 16 | from gen_model import *
 17 | genmodel = styletransfer().cuda()
 18 | genmodel.train()
 19 | 
 20 | sys.path.insert(0, "/DATA/joosung/controllable_english/yelp/classifier/")
 21 | from dis_model import *
 22 | dismodel = findattribute().cuda()
 23 | dismodel_name='cls_model_3'
 24 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))
 25 | dismodel.eval()
 26 | 
 27 | 
 28 | import torch.optim as optim
 29 | 
 30 | from tensorboardX import SummaryWriter
 31 | summary = SummaryWriter(logdir='./logs')
 32 | 
 33 | def main():    
 34 |     f = open('gpt_yelp_vocab.json')
 35 |     token2num = json.load(f)
 36 | 
 37 |     num2token = {}
 38 |     for key, value in token2num.items():
 39 |         num2token[value] = key
 40 |     f.close()
 41 | 
 42 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
 43 |     train_yelp_neg_path = data_path + "/yelp/sentiment.train.0"
 44 |     train_yelp_neg_open = open(train_yelp_neg_path, "r")
 45 |     train_yelp_neg_dataset = train_yelp_neg_open.readlines()
 46 |     yelp_neg_dataset = train_yelp_neg_dataset
 47 |     
 48 |     neg_len = len(yelp_neg_dataset)
 49 |     train_yelp_neg_open.close()
 50 | 
 51 |     train_yelp_pos_path = data_path + "/yelp/sentiment.train.1"
 52 |     train_yelp_pos_open = open(train_yelp_pos_path, "r")
 53 |     train_yelp_pos_dataset = train_yelp_pos_open.readlines()
 54 |     yelp_pos_dataset = train_yelp_pos_dataset
 55 |     
 56 |     pos_len = len(yelp_pos_dataset)
 57 |     train_yelp_pos_open.close()
 58 | 
 59 |     """training parameter"""
 60 |     aed_initial_lr = 0.00001
 61 |     gen_initial_lr = 0.001
 62 |     aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005
 63 |     gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001
 64 |     max_grad_norm = 20
 65 |     batch = 1
 66 |     epoch = 6
 67 |     stop_point = pos_len*epoch
 68 |     
 69 |     pre_epoch = 0
 70 |     for start in tqdm(range(0, stop_point)):
 71 |         ## learing rate decay
 72 |         now_epoch = (start+1)//pos_len
 73 |             
 74 |         """data start point"""
 75 |         neg_start = start%neg_len
 76 |         pos_start = start%pos_len
 77 | 
 78 |         """data setting"""
 79 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
 80 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
 81 | 
 82 |         neg_labels = [] # negative labels
 83 |         neg_labels.append([1,0])
 84 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 85 | 
 86 |         pos_labels = [] # positive labels
 87 |         pos_labels.append([0,1])
 88 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
 89 | 
 90 |         sentences = [neg_sentence, pos_sentence]
 91 |         attributes = [neg_attribute, pos_attribute]
 92 |         sentiments = [0, 1]
 93 | 
 94 |         """data input"""
 95 |         for i in range(2):
 96 |             # k=0: negative, k=1: positive
 97 |             sentence = sentences[i]
 98 |             attribute = attributes[i] # for decoder
 99 |             fake_attribute = attributes[abs(1-i)] # for generate
100 | #             sentiment = sentiments[i] # for delete
101 | 
102 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
103 | 
104 |             # delete model
105 |             max_len = int(token_idx.shape[1]/2)
106 |             dis_out = dismodel.discriminator(token_idx)    
107 |             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete
108 |             
109 |             del_idx = token_idx
110 |             for k in range(max_len):
111 |                 del_idx = dismodel.att_prob(del_idx, sentiment)                
112 |                 dis_out = dismodel.discriminator(del_idx)    
113 |                 sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()
114 |                 if sent_porb < 0.7:
115 |                     break       
116 |                     
117 |             """auto-encoder loss & traning"""
118 |             # training using discriminator loss
119 |             enc_out = genmodel.encoder(del_idx)
120 |             dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)
121 | 
122 |             ## calculation loss
123 |             recon_loss = genmodel.recon_loss(token_idx, vocab_out)
124 |             summary.add_scalar('reconstruction loss', recon_loss.item(), start)
125 |             
126 |             aed_trainer.zero_grad()
127 |             recon_loss.backward(retain_graph=True) # retain_graph=True
128 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)            
129 |             aed_trainer.step()
130 |             
131 |             """decoder classification loss & training"""
132 |             ## calculation loss
133 |             gen_cls_out = dismodel.gen_discriminator(vocab_out)
134 | 
135 |             ## calculation loss
136 |             gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out)
137 |             summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start)
138 | 
139 |             gen_trainer.zero_grad()
140 |             gen_cls_loss.backward() # retain_graph=True
141 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)
142 |             gen_trainer.step()
143 |             
144 |         
145 |         """savining point"""
146 |         if (start+1)%pos_len == 0:
147 |             random.shuffle(yelp_neg_dataset)
148 |             random.shuffle(yelp_pos_dataset)
149 |             save_model((start+1)//pos_len)        
150 |     save_model('final') # final_model    
151 | 
152 |     
153 | def save_model(iter):
154 |     if not os.path.exists('models/'):
155 |         os.makedirs('models/')
156 |     torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter))  
157 |     
158 | 
159 | if __name__ == '__main__':
160 |     torch.cuda.empty_cache()
161 |     main()
162 |     
163 | 


--------------------------------------------------------------------------------
/generation_model/yelp/classifier/.ipynb_checkpoints/dis_model-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | 
  7 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
  8 | 
  9 | class findattribute(nn.Module):
 10 |     def __init__(self, drop_rate=0, gpu = True):
 11 |         super(findattribute, self).__init__()
 12 |         self.gpu = gpu
 13 |         
 14 |         self.n_vocab = 50259
 15 |         self.emb_dim = 256
 16 |         
 17 |         """idx & length"""
 18 |         self.START_IDX = 50257
 19 |         self.PAD_IDX = 50258
 20 |         self.EOS_IDX = 50256
 21 |         
 22 |         """Discriminator(classifier)"""
 23 |         self.word_dim = 256
 24 |         self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024
 25 |         
 26 |         self.channel_out = 100
 27 |         self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim))
 28 |         self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim))
 29 |         self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim))
 30 |         self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim))
 31 | #         self.fc_drop = nn.Dropout(drop_rate)
 32 |         self.disc_fc = nn.Linear(4*self.channel_out, 2)
 33 |         
 34 |         """parameters"""                
 35 |         self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\
 36 |         list(self.conv2d_5.parameters())+list(self.disc_fc.parameters())
 37 |             
 38 | 
 39 |     def discriminator(self, token_idx):
 40 |         """
 41 |         token_idx: (batch, seq_len)
 42 |         """
 43 |         if token_idx.shape[1] < 5:
 44 |             padding_size = 5-token_idx.shape[1]
 45 |             padding_token = []
 46 |             for k in range(token_idx.shape[0]):
 47 |                 temp = []
 48 |                 for i in range(padding_size):
 49 |                     temp.append(self.PAD_IDX)
 50 |                 padding_token.append(temp)                
 51 |             padding_token=torch.from_numpy(np.array(padding_token))
 52 |             if self.gpu == True:
 53 |                 padding_token = padding_token.cuda()
 54 |             token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5)
 55 | 
 56 |         word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim)
 57 |         word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
 58 | 
 59 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
 60 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
 61 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
 62 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
 63 | 
 64 |         # Max-over-time-pool
 65 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
 66 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
 67 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
 68 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
 69 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
 70 | 
 71 |         y = self.disc_fc(x) # (batch, 2)
 72 | 
 73 |         if self.gpu == True:
 74 |             return y.cuda()
 75 |         else:
 76 |             return y
 77 |         
 78 |     def gen_discriminator(self, gen_out):
 79 |         """
 80 |         gen_out: (gen_len+2, batch, n_vocab)
 81 |         """
 82 |         gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab)
 83 |         gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1))
 84 |         # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim)
 85 |         gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim)
 86 |         
 87 |         if gen_emb.shape[1] < 5:
 88 |             padding_size = 5-gen_emb.shape[1]
 89 |             padding_token = []
 90 |             for k in range(gen_emb.shape[0]):
 91 |                 temp = []
 92 |                 for i in range(padding_size):
 93 |                     temp.append(self.PAD_IDX)
 94 |                 padding_token.append(temp)                
 95 |             padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len)
 96 |             if self.gpu == True:
 97 |                 padding_token = padding_token.cuda()
 98 |             padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim)
 99 |             gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim)   
100 |             
101 |         word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
102 | 
103 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
104 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
105 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
106 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
107 | 
108 |         # Max-over-time-pool
109 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
110 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
111 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
112 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
113 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
114 | 
115 |         y = self.disc_fc(x) # (batch, 2)
116 | 
117 |         if self.gpu == True:
118 |             return y.cuda()
119 |         else:
120 |             return y
121 |         
122 |     def att_prob(self, token_idx, sentiment):
123 |         """
124 |         token_idx: (batch, seq_len)
125 |         """
126 | #         if token_idx.size(1) < 5:
127 | #             padding_size = 5-token_idx.size(1)
128 | #             padding_token = []
129 | #             for k in range(token_idx.size(0)):
130 | #                 temp = []
131 | #                 for i in range(padding_size):
132 | #                     temp.append(self.PAD_IDX)
133 | #                 padding_token.append(temp)                
134 | #             padding_token=torch.from_numpy(np.array(padding_token))
135 | #             if self.gpu == True:
136 | #                 padding_token = padding_token.cuda()
137 | #             token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5)
138 |         token_list = token_idx.squeeze(0).cpu().tolist() # list
139 |         min_prob = 1
140 |         for i in range(len(token_list)):
141 |             del_list = token_list[:i] + token_list[i+1:]
142 |             del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda()
143 |             del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item()
144 |             
145 |             if del_prob <= min_prob:                
146 |                 max_ind = i
147 |                 min_prob = del_prob
148 |                 
149 |         final_list = token_list[:max_ind] + token_list[max_ind+1:]
150 |         del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda()
151 |         return del_idx    
152 |         
153 |     def cls_loss(self, targets, cls_out):
154 |         """
155 |         targets: (batch, 2) / attributes [0,1] or [1,0]
156 |         cls_out: (batch, 2) (logits)
157 |         """
158 |         
159 |         final_targets = targets.argmax(1) # (batch)
160 |         cls_loss = F.cross_entropy(cls_out, final_targets)
161 |         
162 |         if self.gpu == True:       
163 |             return cls_loss.cuda()
164 |         else:
165 |             return cls_loss
166 |     
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/generation_model/yelp/classifier/.ipynb_checkpoints/train-checkpoint.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | from tqdm import tqdm
  6 | import os
  7 | import random
  8 | 
  9 | from transformers import *
 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 11 | from tqdm import tqdm
 12 | import json
 13 | 
 14 | 
 15 | ## 초기화
 16 | from dis_model import *
 17 | dismodel = findattribute().cuda()
 18 | dismodel.train()
 19 | 
 20 | import torch.optim as optim
 21 | 
 22 | from tensorboardX import SummaryWriter
 23 | summary = SummaryWriter(logdir='./logs')
 24 | 
 25 | def main():    
 26 |     f = open('../gpt_yelp_vocab.json')
 27 |     token2num = json.load(f)
 28 | 
 29 |     num2token = {}
 30 |     for key, value in token2num.items():
 31 |         num2token[value] = key
 32 |     f.close()
 33 | 
 34 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" # customize data path
 35 |     yelp_neg_path = data_path + "/yelp/sentiment.train.0"
 36 |     yelp_neg_open = open(yelp_neg_path, "r")
 37 |     yelp_neg_dataset = yelp_neg_open.readlines()
 38 |     neg_len = len(yelp_neg_dataset)
 39 |     yelp_neg_open.close()
 40 | 
 41 |     yelp_pos_path = data_path + "/yelp/sentiment.train.1"
 42 |     yelp_pos_open = open(yelp_pos_path, "r")
 43 |     yelp_pos_dataset = yelp_pos_open.readlines()
 44 |     pos_len = len(yelp_pos_dataset)
 45 |     yelp_pos_open.close()
 46 | 
 47 |     """training parameter"""
 48 |     cls_initial_lr = 0.001
 49 |     cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001
 50 |     max_grad_norm = 25
 51 |     batch = 1
 52 |     epoch = 5
 53 |     stop_point = pos_len*epoch
 54 |     
 55 |     pre_epoch = 0
 56 |     for start in tqdm(range(0, stop_point)):
 57 |         ## learing rate decay
 58 |         now_epoch = (start+1)//pos_len
 59 |         if now_epoch == 4:
 60 |             cls_initial_lr = cls_initial_lr/2            
 61 |             cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001
 62 |             
 63 |         """data start point"""
 64 |         neg_start = start%neg_len
 65 |         pos_start = start%pos_len
 66 | 
 67 |         """data setting"""
 68 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
 69 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
 70 | 
 71 |         neg_labels = [] # negative labels
 72 |         neg_labels.append([1,0])
 73 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 74 | 
 75 |         pos_labels = [] # positive labels
 76 |         pos_labels.append([0,1])
 77 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
 78 | 
 79 |         sentences = [neg_sentence, pos_sentence]
 80 |         attributes = [neg_attribute, pos_attribute]
 81 | 
 82 |         """data input"""
 83 |         for i in range(2):
 84 |             # k=0: negative, k=1: positive
 85 |             sentence = sentences[i]
 86 |             attribute = attributes[i] # for generate
 87 | 
 88 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
 89 |             
 90 |             dis_out = dismodel.discriminator(token_idx)
 91 | 
 92 |             """calculation loss & traning"""
 93 |             # training using discriminator loss
 94 |             cls_loss = dismodel.cls_loss(attribute, dis_out)
 95 |             summary.add_scalar('discriminator loss', cls_loss.item(), start)
 96 | 
 97 |             cls_trainer.zero_grad()
 98 |             cls_loss.backward() # retain_graph=True
 99 |             grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm)            
100 |             cls_trainer.step()
101 |         
102 |         """savining point"""
103 |         if (start+1)%pos_len == 0:
104 |             random.shuffle(yelp_neg_dataset)
105 |             random.shuffle(yelp_pos_dataset)
106 |             save_model((start+1)//pos_len)        
107 |     save_model('final') # final_model    
108 | 
109 |     
110 | def save_model(iter):
111 |     if not os.path.exists('models/'):
112 |         os.makedirs('models/')
113 |     torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter))  
114 |     
115 | 
116 | if __name__ == '__main__':
117 |     torch.cuda.empty_cache()
118 |     main()
119 |     
120 | 


--------------------------------------------------------------------------------
/generation_model/yelp/classifier/dis_model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | 
  7 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
  8 | 
  9 | class findattribute(nn.Module):
 10 |     def __init__(self, drop_rate=0, gpu = True):
 11 |         super(findattribute, self).__init__()
 12 |         self.gpu = gpu
 13 |         
 14 |         self.n_vocab = 50259
 15 |         self.emb_dim = 256
 16 |         
 17 |         """idx & length"""
 18 |         self.START_IDX = 50257
 19 |         self.PAD_IDX = 50258
 20 |         self.EOS_IDX = 50256
 21 |         
 22 |         """Discriminator(classifier)"""
 23 |         self.word_dim = 256
 24 |         self.word_emb = nn.Embedding(self.n_vocab, self.word_dim, self.PAD_IDX) # 50265x1024
 25 |         
 26 |         self.channel_out = 100
 27 |         self.conv2d_2 = nn.Conv2d(1,self.channel_out,(2,self.word_dim))
 28 |         self.conv2d_3 = nn.Conv2d(1,self.channel_out,(3,self.word_dim))
 29 |         self.conv2d_4 = nn.Conv2d(1,self.channel_out,(4,self.word_dim))
 30 |         self.conv2d_5 = nn.Conv2d(1,self.channel_out,(5,self.word_dim))
 31 | #         self.fc_drop = nn.Dropout(drop_rate)
 32 |         self.disc_fc = nn.Linear(4*self.channel_out, 2)
 33 |         
 34 |         """parameters"""                
 35 |         self.cls_params = list(self.word_emb.parameters())+list(self.conv2d_2.parameters())+list(self.conv2d_3.parameters())+list(self.conv2d_4.parameters())+\
 36 |         list(self.conv2d_5.parameters())+list(self.disc_fc.parameters())
 37 |             
 38 | 
 39 |     def discriminator(self, token_idx):
 40 |         """
 41 |         token_idx: (batch, seq_len)
 42 |         """
 43 |         if token_idx.shape[1] < 5:
 44 |             padding_size = 5-token_idx.shape[1]
 45 |             padding_token = []
 46 |             for k in range(token_idx.shape[0]):
 47 |                 temp = []
 48 |                 for i in range(padding_size):
 49 |                     temp.append(self.PAD_IDX)
 50 |                 padding_token.append(temp)                
 51 |             padding_token=torch.from_numpy(np.array(padding_token))
 52 |             if self.gpu == True:
 53 |                 padding_token = padding_token.cuda()
 54 |             token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5)
 55 | 
 56 |         word_emb = self.word_emb(token_idx) # (batch, seq_len, word_dim)
 57 |         word_2d = word_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
 58 | 
 59 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
 60 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
 61 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
 62 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
 63 | 
 64 |         # Max-over-time-pool
 65 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
 66 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
 67 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
 68 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
 69 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
 70 | 
 71 |         y = self.disc_fc(x) # (batch, 2)
 72 | 
 73 |         if self.gpu == True:
 74 |             return y.cuda()
 75 |         else:
 76 |             return y
 77 |         
 78 |     def gen_discriminator(self, gen_out):
 79 |         """
 80 |         gen_out: (gen_len+2, batch, n_vocab)
 81 |         """
 82 |         gen_emb = gen_out[1:-1,:,:] # (gen_len, batch, n_vocab)
 83 |         gen_emb = torch.bmm(gen_emb, self.word_emb.weight.repeat(gen_emb.shape[0],1,1))
 84 |         # (gen_len, batch, emb_dim) = (gen_len, batch, n_vocab) x (gen_len, n_vocab, emb_dim)
 85 |         gen_emb = gen_emb.transpose(0, 1) # (batch, gen_len, word_dim)
 86 |         
 87 |         if gen_emb.shape[1] < 5:
 88 |             padding_size = 5-gen_emb.shape[1]
 89 |             padding_token = []
 90 |             for k in range(gen_emb.shape[0]):
 91 |                 temp = []
 92 |                 for i in range(padding_size):
 93 |                     temp.append(self.PAD_IDX)
 94 |                 padding_token.append(temp)                
 95 |             padding_token=torch.from_numpy(np.array(padding_token)) # (batch, padding_len)
 96 |             if self.gpu == True:
 97 |                 padding_token = padding_token.cuda()
 98 |             padding_emb = self.word_emb(padding_token) # (batch, padding_len, emb_dim)
 99 |             gen_emb = torch.cat([gen_emb, padding_emb], 1) # (batch, 5, emb_dim)   
100 |             
101 |         word_2d = gen_emb.unsqueeze(1) # (batch, 1, seq_len, word_dim)
102 | 
103 |         x2 = F.relu(self.conv2d_2(word_2d)).squeeze(3) # bi-gram, (batch, channel_out, seq_len-1)
104 |         x3 = F.relu(self.conv2d_3(word_2d)).squeeze(3) # 3-gram, (batch, channel_out, seq_len-2)
105 |         x4 = F.relu(self.conv2d_4(word_2d)).squeeze(3) # 4-gram, (batch, channel_out, seq_len-3)
106 |         x5 = F.relu(self.conv2d_5(word_2d)).squeeze(3) # 5-gram, (batch, channel_out, seq_len-4)
107 | 
108 |         # Max-over-time-pool
109 |         x2 = F.max_pool1d(x2, x2.size(2)).squeeze(2) # (batch, channel_out)
110 |         x3 = F.max_pool1d(x3, x3.size(2)).squeeze(2) # (batch, channel_out)
111 |         x4 = F.max_pool1d(x4, x4.size(2)).squeeze(2) # (batch, channel_out)
112 |         x5 = F.max_pool1d(x5, x5.size(2)).squeeze(2) # (batch, channel_out)
113 |         x = torch.cat([x2, x3, x4, x5], dim=1) # (batch, channel_out*4)
114 | 
115 |         y = self.disc_fc(x) # (batch, 2)
116 | 
117 |         if self.gpu == True:
118 |             return y.cuda()
119 |         else:
120 |             return y
121 |         
122 |     def att_prob(self, token_idx, sentiment):
123 |         """
124 |         token_idx: (batch, seq_len)
125 |         """
126 | #         if token_idx.size(1) < 5:
127 | #             padding_size = 5-token_idx.size(1)
128 | #             padding_token = []
129 | #             for k in range(token_idx.size(0)):
130 | #                 temp = []
131 | #                 for i in range(padding_size):
132 | #                     temp.append(self.PAD_IDX)
133 | #                 padding_token.append(temp)                
134 | #             padding_token=torch.from_numpy(np.array(padding_token))
135 | #             if self.gpu == True:
136 | #                 padding_token = padding_token.cuda()
137 | #             token_idx=torch.cat([token_idx,padding_token], 1) # (batch, seq_len+padding) = (batch, 5)
138 |         token_list = token_idx.squeeze(0).cpu().tolist() # list
139 |         min_prob = 1
140 |         for i in range(len(token_list)):
141 |             del_list = token_list[:i] + token_list[i+1:]
142 |             del_tensor = torch.from_numpy(np.asarray(del_list)).unsqueeze(0).cuda()
143 |             del_prob=F.softmax(self.discriminator(del_tensor),1).squeeze(0)[sentiment].cpu().detach().numpy().item()
144 |             
145 |             if del_prob <= min_prob:                
146 |                 max_ind = i
147 |                 min_prob = del_prob
148 |                 
149 |         final_list = token_list[:max_ind] + token_list[max_ind+1:]
150 |         del_idx = torch.from_numpy(np.asarray(final_list)).unsqueeze(0).cuda()
151 |         return del_idx    
152 |         
153 |     def cls_loss(self, targets, cls_out):
154 |         """
155 |         targets: (batch, 2) / attributes [0,1] or [1,0]
156 |         cls_out: (batch, 2) (logits)
157 |         """
158 |         
159 |         final_targets = targets.argmax(1) # (batch)
160 |         cls_loss = F.cross_entropy(cls_out, final_targets)
161 |         
162 |         if self.gpu == True:       
163 |             return cls_loss.cuda()
164 |         else:
165 |             return cls_loss
166 |     
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/generation_model/yelp/classifier/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | from tqdm import tqdm
  6 | import os
  7 | import random
  8 | 
  9 | from transformers import *
 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 11 | from tqdm import tqdm
 12 | import json
 13 | 
 14 | 
 15 | ## 초기화
 16 | from dis_model import *
 17 | dismodel = findattribute().cuda()
 18 | dismodel.train()
 19 | 
 20 | import torch.optim as optim
 21 | 
 22 | from tensorboardX import SummaryWriter
 23 | summary = SummaryWriter(logdir='./logs')
 24 | 
 25 | def main():    
 26 |     f = open('../gpt_yelp_vocab.json')
 27 |     token2num = json.load(f)
 28 | 
 29 |     num2token = {}
 30 |     for key, value in token2num.items():
 31 |         num2token[value] = key
 32 |     f.close()
 33 | 
 34 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data" # customize data path
 35 |     yelp_neg_path = data_path + "/yelp/sentiment.train.0"
 36 |     yelp_neg_open = open(yelp_neg_path, "r")
 37 |     yelp_neg_dataset = yelp_neg_open.readlines()
 38 |     neg_len = len(yelp_neg_dataset)
 39 |     yelp_neg_open.close()
 40 | 
 41 |     yelp_pos_path = data_path + "/yelp/sentiment.train.1"
 42 |     yelp_pos_open = open(yelp_pos_path, "r")
 43 |     yelp_pos_dataset = yelp_pos_open.readlines()
 44 |     pos_len = len(yelp_pos_dataset)
 45 |     yelp_pos_open.close()
 46 | 
 47 |     """training parameter"""
 48 |     cls_initial_lr = 0.001
 49 |     cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001
 50 |     max_grad_norm = 25
 51 |     batch = 1
 52 |     epoch = 5
 53 |     stop_point = pos_len*epoch
 54 |     
 55 |     pre_epoch = 0
 56 |     for start in tqdm(range(0, stop_point)):
 57 |         ## learing rate decay
 58 |         now_epoch = (start+1)//pos_len
 59 |         if now_epoch == 4:
 60 |             cls_initial_lr = cls_initial_lr/2            
 61 |             cls_trainer = optim.Adamax(dismodel.cls_params, lr=cls_initial_lr) # initial 0.001
 62 |             
 63 |         """data start point"""
 64 |         neg_start = start%neg_len
 65 |         pos_start = start%pos_len
 66 | 
 67 |         """data setting"""
 68 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
 69 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
 70 | 
 71 |         neg_labels = [] # negative labels
 72 |         neg_labels.append([1,0])
 73 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 74 | 
 75 |         pos_labels = [] # positive labels
 76 |         pos_labels.append([0,1])
 77 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
 78 | 
 79 |         sentences = [neg_sentence, pos_sentence]
 80 |         attributes = [neg_attribute, pos_attribute]
 81 | 
 82 |         """data input"""
 83 |         for i in range(2):
 84 |             # k=0: negative, k=1: positive
 85 |             sentence = sentences[i]
 86 |             attribute = attributes[i] # for generate
 87 | 
 88 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
 89 |             
 90 |             dis_out = dismodel.discriminator(token_idx)
 91 | 
 92 |             """calculation loss & traning"""
 93 |             # training using discriminator loss
 94 |             cls_loss = dismodel.cls_loss(attribute, dis_out)
 95 |             summary.add_scalar('discriminator loss', cls_loss.item(), start)
 96 | 
 97 |             cls_trainer.zero_grad()
 98 |             cls_loss.backward() # retain_graph=True
 99 |             grad_norm = torch.nn.utils.clip_grad_norm_(dismodel.cls_params, max_grad_norm)            
100 |             cls_trainer.step()
101 |         
102 |         """savining point"""
103 |         if (start+1)%pos_len == 0:
104 |             random.shuffle(yelp_neg_dataset)
105 |             random.shuffle(yelp_pos_dataset)
106 |             save_model((start+1)//pos_len)        
107 |     save_model('final') # final_model    
108 | 
109 |     
110 | def save_model(iter):
111 |     if not os.path.exists('models/'):
112 |         os.makedirs('models/')
113 |     torch.save(dismodel.state_dict(), 'models/cls_model_{}'.format(iter))  
114 |     
115 | 
116 | if __name__ == '__main__':
117 |     torch.cuda.empty_cache()
118 |     main()
119 |     
120 | 


--------------------------------------------------------------------------------
/generation_model/yelp/gen_model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | import math
  7 | from transformers import *
  8 | 
  9 | sys.path.insert(0, "/DATA/joosung/fairseq_master")
 10 | 
 11 | import json
 12 | f = open('gpt_yelp_vocab.json')
 13 | token2num = json.load(f)
 14 | 
 15 | num2token = {}
 16 | for key, value in token2num.items():
 17 |     num2token[value] = key
 18 | 
 19 | class PositionalEncoding(nn.Module):
 20 |     def __init__(self, d_model, dropout=0.1, max_len=5000):
 21 |         super(PositionalEncoding, self).__init__()
 22 |         self.dropout = nn.Dropout(p=dropout)
 23 | 
 24 |         pe = torch.zeros(max_len, d_model)
 25 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 26 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 27 |         pe[:, 0::2] = torch.sin(position * div_term)
 28 |         pe[:, 1::2] = torch.cos(position * div_term)
 29 |         pe = pe.unsqueeze(0).transpose(0, 1)
 30 |         self.register_buffer('pe', pe)
 31 | 
 32 |     def forward(self, x):
 33 |         x = x + self.pe[:x.size(0), :]
 34 |         return self.dropout(x)
 35 | 
 36 | class styletransfer(nn.Module):
 37 |     def __init__(self, drop_rate=0, gpu = True):
 38 |         super(styletransfer, self).__init__()
 39 |         self.gpu = gpu
 40 |         self.gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 41 |         
 42 |         """hyper parameters"""
 43 |         self.n_vocab = 50259
 44 |         self.emb_dim = 256
 45 |         self.nhead = 4
 46 |         self.num_layers = 3
 47 |         
 48 |         """idx & length"""
 49 |         self.START_IDX = 50257
 50 |         self.PAD_IDX = 50258
 51 |         self.EOS_IDX = 50256
 52 |         self.MAX_SENT_LEN = 10
 53 |         
 54 |         """attribute matrix"""
 55 |         ## one_hot encoding
 56 |         self.att_num = 2
 57 |         self.matrix_A = nn.Linear(self.att_num, self.emb_dim)
 58 |         
 59 |         """word embedding"""
 60 |         self.emb_matrix = nn.Embedding(self.n_vocab, self.emb_dim, self.PAD_IDX) # 50259x1024
 61 |         
 62 |         """Position embedding"""
 63 |         self.pos_encoder = PositionalEncoding(self.emb_dim)
 64 |         
 65 |         """Encoder"""
 66 |         self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 67 |         self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=self.num_layers)       
 68 |         
 69 |         """Decoder"""                
 70 |         self.decoder_layer = nn.TransformerDecoderLayer(d_model=self.emb_dim, nhead=self.nhead)
 71 |         self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=self.num_layers)
 72 |         self.matrix_D = nn.Linear(self.emb_dim, self.n_vocab) # emb_dim -> n_vocab
 73 |         
 74 |         """parameters"""        
 75 |         self.enc_params = list(self.encoder_layer.parameters())+list(self.transformer_encoder.parameters())
 76 |         self.dec_params = list(self.decoder_layer.parameters())+list(self.transformer_decoder.parameters())+list(self.matrix_D.parameters())
 77 |         self.aed_params = list(self.emb_matrix.parameters())+self.enc_params+self.dec_params
 78 | 
 79 |     """Modeling"""
 80 |     def encoder(self, enc_input):
 81 |         """
 82 |         enc_input: (batch, enc_len)
 83 |         """
 84 |         word_emb = self.emb_matrix(enc_input) # (batch, enc_len, emb_dim)
 85 |         word_emb = word_emb.transpose(0, 1) # (enc_len, batch, emb_dim)
 86 |         word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
 87 |         out_enc = self.transformer_encoder(word_pos) # (enc_len, batch, emb_dim)
 88 |         
 89 |         return out_enc
 90 |         
 91 |     def decoder(self, enc_out, dec_input, attribute):
 92 |         """
 93 |         enc_out: (enc_len, batch, emb_dim)
 94 |         dec_input: (batch, dec_len)
 95 |         attributes: (batch, 2)
 96 |         """
 97 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
 98 |         
 99 |         word_emb = self.emb_matrix(dec_input) # (batch, dec_len, emb_dim)
100 |         word_emb = word_emb.transpose(0, 1) # (dec_len, batch, emb_dim)
101 |         word_pos = self.pos_encoder(word_emb) # (dec_len, batch, emb_dim)    
102 |         
103 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
104 |         start_token = start_token.repeat(1, dec_input.shape[0], 1) # (1, batch, emb_dim)        
105 |         style_dec_input = torch.cat([att_emb, start_token, word_pos], 0) # (dec_len+2, batch, emb_dim) w/ [att], [start]
106 |         
107 |         tgt_mask = self.generate_square_subsequent_mask(style_dec_input.shape[0]).cuda() # (dec_len+2, dec_len+2)
108 | 
109 |         dec_out = self.transformer_decoder(style_dec_input, enc_out, tgt_mask=tgt_mask) # (dec_len+2, batch, emb_dim)
110 |         vocab_out = self.matrix_D(dec_out) # (dec_len+2, batch, n_vocab)
111 |         return dec_out, vocab_out
112 |     
113 |     def generator(self, enc_out, gen_len, attribute):
114 |         """
115 |         enc_out: (enc_len, batch, emb_dim)
116 |         attributes: (batch, 2)
117 |         gen_len: len(dec_in)+1
118 |         """
119 |         # initialization because there are no first token
120 |         batch = enc_out.shape[1]
121 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
122 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
123 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
124 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
125 |         
126 |         for i in range(gen_len):
127 |             tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (pre_gen_len, pre_gen_len)
128 |             dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (pre_gen_len, batch, emb_dim)
129 |             vocab_out = self.matrix_D(dec_out) # (pre_gen_len, batch, n_vocab)
130 |             
131 |             vocab_idx = vocab_out.argmax(2) # (pre_gen_len, batch)
132 |             vocab_idx = vocab_idx.transpose(0, 1) # (batch, pre_gen_len)
133 |             
134 |             new_word_emb = self.emb_matrix(vocab_idx) # (batch, pre_gen_len, emb_dim)
135 |             new_word_emb = new_word_emb.transpose(0, 1) # (pre_gen_len, batch, emb_dim)
136 | #             gen_emb = torch.bmm(vocab_out, self.emb_matrix.weight.repeat(vocab_out.shape[0],1,1))
137 |             
138 | #             word_pos = self.pos_encoder(word_emb) # (enc_len, batch, emb_dim)
139 |             gen_input = torch.cat([gen_input, new_word_emb[-1:,:,:]]) # (pre_gen_len+1, batch, word_dim), pre_gen_len+=1        
140 |         
141 |         return vocab_out # (gen_len+2, batch, n_vocab)
142 | 
143 |     def generate_square_subsequent_mask(self,sz): # len(sz)
144 |         mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
145 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
146 |         return mask
147 |     
148 |     """calculation loss"""
149 |     def recon_loss(self, dec_input, vocab_out):
150 |         """
151 |         dec_input: (batch, dec_len)
152 |         vocab_out: (dec_len+2, batch, n_vocab) with [att], [start]
153 |         """
154 |         end_token = torch.tensor(self.EOS_IDX).cuda() # (1)
155 |         end_token = end_token.repeat(dec_input.shape[0], 1) # (batch, 1)
156 |         target_tokens = torch.cat([dec_input, end_token], 1) # (batch, dec_len+1) w/ [EOS]
157 |         
158 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab)
159 |         pred_out = pred_out.permute(1,0,2) # (batch, dec_len+1, n_vocab)
160 |                 
161 |         target_tokens = target_tokens.contiguous() # (batch, dec_len+1)
162 |         pred_out = pred_out.contiguous() # (batch, dec_len+1, n_vocab)
163 |     
164 |         target_tokens = target_tokens.view(-1) # (batch*(dec_len+1))
165 |         pred_out = pred_out.view(-1, pred_out.shape[2]) # (batch*(seq_len+1), n_vocab)
166 |         
167 |         recon_loss = F.cross_entropy(pred_out, target_tokens)                
168 |         
169 |         return recon_loss
170 |     
171 |     def cls_loss(self, attributes, cls_out):
172 |         """
173 |         attributes: [0,1] or [1,0]
174 |         cls_out: (batch, 2) (logits)
175 |         """        
176 |         targets = attributes.argmax(1) # (batch)
177 |         cls_loss = F.cross_entropy(cls_out, targets)
178 |         
179 |         if self.gpu == True:       
180 |             return cls_loss.cuda()
181 |         else:
182 |             return cls_loss
183 |         
184 |     """inferenece"""
185 |     def dec2sen(self, vocab_out):
186 |         """
187 |         vocab_out: (dec_len+2, batch, n_vocab) with att, start
188 |         """
189 |         pred_out = vocab_out[1:,:,:] # (dec_len+1, batch, n_vocab) with [END]
190 |         pred_idx = torch.argmax(pred_out, 2) # (dec_len+1, batch)
191 |         pred_idx = pred_idx.squeeze(1) # (dec_len+1) because of batch=1
192 |         
193 |         token_list = []
194 |         dec_sen =''
195 |         for i in range(len(pred_idx)):
196 |             token = num2token[pred_idx[i].cpu().numpy().item()]
197 |             token_list.append(token)
198 |             
199 |             if 'Ġ' in token:
200 |                 token = token.strip('Ġ')
201 |                 dec_sen += ' '
202 |                 dec_sen += token
203 |             else:
204 |                 dec_sen += token
205 |         dec_sen = dec_sen.strip()
206 |             
207 |         
208 |         return token_list, dec_sen
209 |     
210 |     def generated_sentence(self, enc_out, attribute, ori_length):
211 |         """
212 |         enc_out: (enc_len, batch, emb_dim)
213 |         dec_input: (batch, dec_len)
214 |         attributes: (batch, 2)
215 |         """
216 |         batch = enc_out.shape[1]
217 | #         max_len = enc_out.shape[0]+3
218 |         max_len = ori_length+5
219 |         
220 |         # initialization because there are no first token
221 |         att_emb = self.matrix_A(attribute).unsqueeze(0) # (1. batch, emb_dim)
222 |         start_token = self.emb_matrix(torch.tensor(self.START_IDX).cuda()) # (emb_dim)
223 |         start_token = start_token.repeat(1, batch, 1) # (1, batch, emb_dim)        
224 |         gen_input = torch.cat([att_emb, start_token], 0) # (2, batch, emb_dim) w/ [att], [start]
225 | 
226 |         tgt_mask = self.generate_square_subsequent_mask(gen_input.shape[0]).cuda() # (2, 2)        
227 |         
228 |         dec_out = self.transformer_decoder(gen_input, enc_out, tgt_mask=tgt_mask) # (2, batch, emb_dim)
229 |         vocab_out = self.matrix_D(dec_out) # (2, batch, n_vocab)
230 |         _, dec_sen = self.dec2sen(vocab_out)
231 |         
232 |         gen_vocab_out = []
233 |         for i in range(max_len):            
234 |             token_idx = torch.tensor(self.gpt_tokenizer.encode(dec_sen)).unsqueeze(0).cuda() # (batch, gen_len)
235 |             if self.EOS_IDX in token_idx:
236 |                 break
237 |                 
238 |             dec_out, vocab_out = self.decoder(enc_out, token_idx, attribute) # (dec_len+2, batch, emb_dim), (dec_len+2, batch, n_vocab)
239 |             dec_tokens, dec_sen = self.dec2sen(vocab_out)           
240 |             
241 |         return dec_sen
242 | 
243 |             
244 |             
245 |             
246 |             
247 |             
248 |             
249 |             
250 |             
251 |             
252 |             
253 |             
254 |             
255 |     
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 


--------------------------------------------------------------------------------
/generation_model/yelp/train.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | from tqdm import tqdm
  6 | import os
  7 | import random
  8 | 
  9 | from transformers import *
 10 | gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
 11 | from tqdm import tqdm
 12 | import json
 13 | 
 14 | 
 15 | ## 초기화
 16 | from gen_model import *
 17 | genmodel = styletransfer().cuda()
 18 | genmodel.train()
 19 | 
 20 | sys.path.insert(0, "/DATA/joosung/controllable_english/yelp/classifier/")
 21 | from dis_model import *
 22 | dismodel = findattribute().cuda()
 23 | dismodel_name='cls_model_3'
 24 | dismodel.load_state_dict(torch.load('../classifier/models/{}'.format(dismodel_name)))
 25 | dismodel.eval()
 26 | 
 27 | 
 28 | import torch.optim as optim
 29 | 
 30 | from tensorboardX import SummaryWriter
 31 | summary = SummaryWriter(logdir='./logs')
 32 | 
 33 | def main():    
 34 |     f = open('gpt_yelp_vocab.json')
 35 |     token2num = json.load(f)
 36 | 
 37 |     num2token = {}
 38 |     for key, value in token2num.items():
 39 |         num2token[value] = key
 40 |     f.close()
 41 | 
 42 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
 43 |     train_yelp_neg_path = data_path + "/yelp/sentiment.train.0"
 44 |     train_yelp_neg_open = open(train_yelp_neg_path, "r")
 45 |     train_yelp_neg_dataset = train_yelp_neg_open.readlines()
 46 |     yelp_neg_dataset = train_yelp_neg_dataset
 47 |     
 48 |     neg_len = len(yelp_neg_dataset)
 49 |     train_yelp_neg_open.close()
 50 | 
 51 |     train_yelp_pos_path = data_path + "/yelp/sentiment.train.1"
 52 |     train_yelp_pos_open = open(train_yelp_pos_path, "r")
 53 |     train_yelp_pos_dataset = train_yelp_pos_open.readlines()
 54 |     yelp_pos_dataset = train_yelp_pos_dataset
 55 |     
 56 |     pos_len = len(yelp_pos_dataset)
 57 |     train_yelp_pos_open.close()
 58 | 
 59 |     """training parameter"""
 60 |     aed_initial_lr = 0.00001
 61 |     gen_initial_lr = 0.001
 62 |     aed_trainer = optim.Adamax(genmodel.aed_params, lr=aed_initial_lr) # initial 0.0005
 63 |     gen_trainer = optim.Adamax(genmodel.aed_params, lr=gen_initial_lr) # initial 0.0001
 64 |     max_grad_norm = 20
 65 |     batch = 1
 66 |     epoch = 6
 67 |     stop_point = pos_len*epoch
 68 |     
 69 |     pre_epoch = 0
 70 |     for start in tqdm(range(0, stop_point)):
 71 |         ## learing rate decay
 72 |         now_epoch = (start+1)//pos_len
 73 |             
 74 |         """data start point"""
 75 |         neg_start = start%neg_len
 76 |         pos_start = start%pos_len
 77 | 
 78 |         """data setting"""
 79 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
 80 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
 81 | 
 82 |         neg_labels = [] # negative labels
 83 |         neg_labels.append([1,0])
 84 |         neg_attribute = torch.from_numpy(np.asarray(neg_labels)).type(torch.FloatTensor).cuda()
 85 | 
 86 |         pos_labels = [] # positive labels
 87 |         pos_labels.append([0,1])
 88 |         pos_attribute = torch.from_numpy(np.asarray(pos_labels)).type(torch.FloatTensor).cuda()
 89 | 
 90 |         sentences = [neg_sentence, pos_sentence]
 91 |         attributes = [neg_attribute, pos_attribute]
 92 |         sentiments = [0, 1]
 93 | 
 94 |         """data input"""
 95 |         for i in range(2):
 96 |             # k=0: negative, k=1: positive
 97 |             sentence = sentences[i]
 98 |             attribute = attributes[i] # for decoder
 99 |             fake_attribute = attributes[abs(1-i)] # for generate
100 | #             sentiment = sentiments[i] # for delete
101 | 
102 |             token_idx = torch.tensor(gpt_tokenizer.encode(sentence)).unsqueeze(0).cuda()
103 | 
104 |             # delete model
105 |             max_len = int(token_idx.shape[1]/2)
106 |             dis_out = dismodel.discriminator(token_idx)    
107 |             sentiment = dis_out.argmax(1).cpu().item() ## 변경점 for delete
108 |             
109 |             del_idx = token_idx
110 |             for k in range(max_len):
111 |                 del_idx = dismodel.att_prob(del_idx, sentiment)                
112 |                 dis_out = dismodel.discriminator(del_idx)    
113 |                 sent_porb = F.softmax(dis_out, 1).squeeze(0)[sentiment].cpu().detach().numpy().item()
114 |                 if sent_porb < 0.7:
115 |                     break       
116 |                     
117 |             """auto-encoder loss & traning"""
118 |             # training using discriminator loss
119 |             enc_out = genmodel.encoder(del_idx)
120 |             dec_out, vocab_out = genmodel.decoder(enc_out, token_idx, attribute)
121 | 
122 |             ## calculation loss
123 |             recon_loss = genmodel.recon_loss(token_idx, vocab_out)
124 |             summary.add_scalar('reconstruction loss', recon_loss.item(), start)
125 |             
126 |             aed_trainer.zero_grad()
127 |             recon_loss.backward(retain_graph=True) # retain_graph=True
128 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)            
129 |             aed_trainer.step()
130 |             
131 |             """decoder classification loss & training"""
132 |             ## calculation loss
133 |             gen_cls_out = dismodel.gen_discriminator(vocab_out)
134 | 
135 |             ## calculation loss
136 |             gen_cls_loss = genmodel.cls_loss(attribute, gen_cls_out)
137 |             summary.add_scalar('generated sentence loss', gen_cls_loss.item(), start)
138 | 
139 |             gen_trainer.zero_grad()
140 |             gen_cls_loss.backward() # retain_graph=True
141 |             grad_norm = torch.nn.utils.clip_grad_norm_(genmodel.aed_params, max_grad_norm)
142 |             gen_trainer.step()
143 |             
144 |         
145 |         """savining point"""
146 |         if (start+1)%pos_len == 0:
147 |             random.shuffle(yelp_neg_dataset)
148 |             random.shuffle(yelp_pos_dataset)
149 |             save_model((start+1)//pos_len)        
150 |     save_model('final') # final_model    
151 | 
152 |     
153 | def save_model(iter):
154 |     if not os.path.exists('models/'):
155 |         os.makedirs('models/')
156 |     torch.save(genmodel.state_dict(), 'models/gen_model_{}'.format(iter))  
157 |     
158 | 
159 | if __name__ == '__main__':
160 |     torch.cuda.empty_cache()
161 |     main()
162 |     
163 | 


--------------------------------------------------------------------------------
/gpt2/amazon/.ipynb_checkpoints/train-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | import torch.optim as optim
 4 | import os
 5 | import random
 6 | 
 7 | from transformers import *
 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer)
 9 | 
10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
11 | tokenizer = tokenizer_class.from_pretrained('gpt2')
12 | 
13 | model = model_class.from_pretrained('gpt2').cuda()
14 | model.train()
15 | print('ok')
16 | 
17 | def main():
18 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
19 |     train_amazon_neg_path = data_path + "/amazon/sentiment.train.0"
20 |     train_amazon_neg_open = open(train_amazon_neg_path, "r")
21 |     train_amazon_neg_dataset = train_amazon_neg_open.readlines()
22 |     amazon_neg_dataset = train_amazon_neg_dataset
23 |     
24 |     neg_len = len(amazon_neg_dataset)
25 |     train_amazon_neg_open.close()
26 | 
27 |     train_amazon_pos_path = data_path + "/amazon/sentiment.train.1"
28 |     train_amazon_pos_open = open(train_amazon_pos_path, "r")
29 |     train_amazon_pos_dataset = train_amazon_pos_open.readlines()
30 |     amazon_pos_dataset = train_amazon_pos_dataset
31 |     
32 |     pos_len = len(amazon_pos_dataset)
33 |     train_amazon_pos_open.close()
34 | 
35 |     epoch = 5
36 |     epoch_len = max(pos_len,neg_len)
37 |     stop_point = epoch_len*epoch    
38 |     
39 |     # Parameters:
40 |     lr = 1e-3
41 |     max_grad_norm = 1.0
42 |     num_total_steps = stop_point # 1000
43 |     num_warmup_steps = int(stop_point/10) # 100
44 |     warmup_proportion = float(num_warmup_steps) / float(num_total_steps)  # 0.1
45 | 
46 |     lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
47 | 
48 |     optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False
49 |     scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps)  # PyTorch scheduler    
50 | 
51 |     torch.cuda.empty_cache()
52 |     for start in tqdm(range(stop_point)):
53 |         """data start point"""
54 |         neg_start = start%neg_len
55 |         pos_start = start%pos_len
56 | 
57 |         """data setting"""
58 |         neg_sentence = amazon_neg_dataset[neg_start].strip()
59 |         pos_sentence = amazon_pos_dataset[pos_start].strip()                
60 | 
61 |         sentences = [neg_sentence, pos_sentence]
62 |         """data input"""
63 |         for i in range(2):
64 |             # k=0: negative, k=1: positive
65 |             sentence = sentences[i]
66 | 
67 |             sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda()
68 |             output = model(sen_idx)
69 |             
70 |             if len(sen_idx) == 1:
71 |                 continue
72 |             target = sen_idx[1:]
73 |             pred = output[0][:-1,:]            
74 | 
75 |             loss = lm_loss(pred, target)
76 |             loss.backward()
77 |             torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
78 |             optimizer.step()
79 |             scheduler.step()
80 |             optimizer.zero_grad()
81 | 
82 | #             print(loss)
83 |         if (start+1)%epoch_len == 0:
84 |             random.shuffle(amazon_neg_dataset)
85 |             random.shuffle(amazon_pos_dataset)
86 |             save_model((start+1)//pos_len)        
87 |     save_model('final') # final_model
88 |     
89 |     
90 | def save_model(name):
91 |     if not os.path.exists(str(name)+'/'):
92 |         os.makedirs(str(name)+'/')
93 |     model.save_pretrained('./'+str(name))
94 |     tokenizer.save_pretrained('./'+str(name))    
95 | 
96 | if __name__ == '__main__':
97 |     torch.cuda.empty_cache()
98 |     main()
99 |         


--------------------------------------------------------------------------------
/gpt2/amazon/train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | import torch.optim as optim
 4 | import os
 5 | import random
 6 | 
 7 | from transformers import *
 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer)
 9 | 
10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
11 | tokenizer = tokenizer_class.from_pretrained('gpt2')
12 | 
13 | model = model_class.from_pretrained('gpt2').cuda()
14 | model.train()
15 | print('ok')
16 | 
17 | def main():
18 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
19 |     train_amazon_neg_path = data_path + "/amazon/sentiment.train.0"
20 |     train_amazon_neg_open = open(train_amazon_neg_path, "r")
21 |     train_amazon_neg_dataset = train_amazon_neg_open.readlines()
22 |     amazon_neg_dataset = train_amazon_neg_dataset
23 |     
24 |     neg_len = len(amazon_neg_dataset)
25 |     train_amazon_neg_open.close()
26 | 
27 |     train_amazon_pos_path = data_path + "/amazon/sentiment.train.1"
28 |     train_amazon_pos_open = open(train_amazon_pos_path, "r")
29 |     train_amazon_pos_dataset = train_amazon_pos_open.readlines()
30 |     amazon_pos_dataset = train_amazon_pos_dataset
31 |     
32 |     pos_len = len(amazon_pos_dataset)
33 |     train_amazon_pos_open.close()
34 | 
35 |     epoch = 5
36 |     epoch_len = max(pos_len,neg_len)
37 |     stop_point = epoch_len*epoch    
38 |     
39 |     # Parameters:
40 |     lr = 1e-3
41 |     max_grad_norm = 1.0
42 |     num_total_steps = stop_point # 1000
43 |     num_warmup_steps = int(stop_point/10) # 100
44 |     warmup_proportion = float(num_warmup_steps) / float(num_total_steps)  # 0.1
45 | 
46 |     lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
47 | 
48 |     optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False
49 |     scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps)  # PyTorch scheduler    
50 | 
51 |     torch.cuda.empty_cache()
52 |     for start in tqdm(range(stop_point)):
53 |         """data start point"""
54 |         neg_start = start%neg_len
55 |         pos_start = start%pos_len
56 | 
57 |         """data setting"""
58 |         neg_sentence = amazon_neg_dataset[neg_start].strip()
59 |         pos_sentence = amazon_pos_dataset[pos_start].strip()                
60 | 
61 |         sentences = [neg_sentence, pos_sentence]
62 |         """data input"""
63 |         for i in range(2):
64 |             # k=0: negative, k=1: positive
65 |             sentence = sentences[i]
66 | 
67 |             sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda()
68 |             output = model(sen_idx)
69 |             
70 |             if len(sen_idx) == 1:
71 |                 continue
72 |             target = sen_idx[1:]
73 |             pred = output[0][:-1,:]            
74 | 
75 |             loss = lm_loss(pred, target)
76 |             loss.backward()
77 |             torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
78 |             optimizer.step()
79 |             scheduler.step()
80 |             optimizer.zero_grad()
81 | 
82 | #             print(loss)
83 |         if (start+1)%epoch_len == 0:
84 |             random.shuffle(amazon_neg_dataset)
85 |             random.shuffle(amazon_pos_dataset)
86 |             save_model((start+1)//pos_len)        
87 |     save_model('final') # final_model
88 |     
89 |     
90 | def save_model(name):
91 |     if not os.path.exists(str(name)+'/'):
92 |         os.makedirs(str(name)+'/')
93 |     model.save_pretrained('./'+str(name))
94 |     tokenizer.save_pretrained('./'+str(name))    
95 | 
96 | if __name__ == '__main__':
97 |     torch.cuda.empty_cache()
98 |     main()
99 |         


--------------------------------------------------------------------------------
/gpt2/yelp/.ipynb_checkpoints/train-checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | import torch.optim as optim
 4 | import os
 5 | import random
 6 | 
 7 | from transformers import *
 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer)
 9 | 
10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
11 | tokenizer = tokenizer_class.from_pretrained('gpt2')
12 | 
13 | model = model_class.from_pretrained('gpt2').cuda()
14 | model.train()
15 | print('ok')
16 | 
17 | def main():
18 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
19 |     yelp_neg_path = data_path + "/yelp/sentiment.train.0"
20 |     yelp_neg_open = open(yelp_neg_path, "r")
21 |     yelp_neg_dataset = yelp_neg_open.readlines()
22 |     neg_len = len(yelp_neg_dataset)
23 |     yelp_neg_open.close()
24 | 
25 |     yelp_pos_path = data_path + "/yelp/sentiment.train.1"
26 |     yelp_pos_open = open(yelp_pos_path, "r")
27 |     yelp_pos_dataset = yelp_pos_open.readlines()
28 |     pos_len = len(yelp_pos_dataset)
29 |     yelp_pos_open.close()
30 | 
31 |     epoch = 5
32 |     stop_point = pos_len*epoch    
33 |     
34 |     # Parameters:
35 |     lr = 1e-3
36 |     max_grad_norm = 1.0
37 |     num_total_steps = stop_point # 1000
38 |     num_warmup_steps = int(stop_point/10) # 100
39 |     warmup_proportion = float(num_warmup_steps) / float(num_total_steps)  # 0.1
40 | 
41 |     lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
42 | 
43 |     optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False
44 |     scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps)  # PyTorch scheduler    
45 | 
46 |     torch.cuda.empty_cache()
47 |     for start in tqdm(range(stop_point)):
48 |         """data start point"""
49 |         neg_start = start%neg_len
50 |         pos_start = start%pos_len
51 | 
52 |         """data setting"""
53 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
54 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
55 | 
56 |         sentences = [neg_sentence, pos_sentence]
57 |         """data input"""
58 |         for i in range(2):
59 |             # k=0: negative, k=1: positive
60 |             sentence = sentences[i]
61 | 
62 |             sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda()
63 |             output = model(sen_idx)
64 |             
65 |             if len(sen_idx) == 1:
66 |                 continue
67 |             target = sen_idx[1:]
68 |             pred = output[0][:-1,:]            
69 | 
70 |             loss = lm_loss(pred, target)
71 |             loss.backward()
72 |             torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
73 |             optimizer.step()
74 |             scheduler.step()
75 |             optimizer.zero_grad()
76 | 
77 | #             print(loss)
78 |         if (start+1)%pos_len == 0:
79 |             random.shuffle(yelp_neg_dataset)
80 |             random.shuffle(yelp_pos_dataset)
81 |             save_model((start+1)//pos_len)        
82 |     save_model('final') # final_model
83 |     
84 |     
85 | def save_model(name):
86 |     if not os.path.exists(str(name)+'/'):
87 |         os.makedirs(str(name)+'/')
88 |     model.save_pretrained('./'+str(name))
89 |     tokenizer.save_pretrained('./'+str(name))    
90 | 
91 | if __name__ == '__main__':
92 |     torch.cuda.empty_cache()
93 |     main()
94 |         


--------------------------------------------------------------------------------
/gpt2/yelp/train.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | import torch.optim as optim
 4 | import os
 5 | import random
 6 | 
 7 | from transformers import *
 8 | model_class, tokenizer_class = (GPT2LMHeadModel, GPT2Tokenizer)
 9 | 
10 | # tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
11 | tokenizer = tokenizer_class.from_pretrained('gpt2')
12 | 
13 | model = model_class.from_pretrained('gpt2').cuda()
14 | model.train()
15 | print('ok')
16 | 
17 | def main():
18 |     data_path = "/DATA/joosung/sentiment_data/Sentiment-and-Style-Transfer-master/data"
19 |     yelp_neg_path = data_path + "/yelp/sentiment.train.0"
20 |     yelp_neg_open = open(yelp_neg_path, "r")
21 |     yelp_neg_dataset = yelp_neg_open.readlines()
22 |     neg_len = len(yelp_neg_dataset)
23 |     yelp_neg_open.close()
24 | 
25 |     yelp_pos_path = data_path + "/yelp/sentiment.train.1"
26 |     yelp_pos_open = open(yelp_pos_path, "r")
27 |     yelp_pos_dataset = yelp_pos_open.readlines()
28 |     pos_len = len(yelp_pos_dataset)
29 |     yelp_pos_open.close()
30 | 
31 |     epoch = 5
32 |     stop_point = pos_len*epoch    
33 |     
34 |     # Parameters:
35 |     lr = 1e-3
36 |     max_grad_norm = 1.0
37 |     num_total_steps = stop_point # 1000
38 |     num_warmup_steps = int(stop_point/10) # 100
39 |     warmup_proportion = float(num_warmup_steps) / float(num_total_steps)  # 0.1
40 | 
41 |     lm_loss = torch.nn.CrossEntropyLoss(ignore_index=-1)
42 | 
43 |     optimizer = AdamW(model.parameters(), lr=lr, correct_bias=False)  # To reproduce BertAdam specific behavior set correct_bias=False
44 |     scheduler = WarmupLinearSchedule(optimizer, warmup_steps=num_warmup_steps, t_total=num_total_steps)  # PyTorch scheduler    
45 | 
46 |     torch.cuda.empty_cache()
47 |     for start in tqdm(range(stop_point)):
48 |         """data start point"""
49 |         neg_start = start%neg_len
50 |         pos_start = start%pos_len
51 | 
52 |         """data setting"""
53 |         neg_sentence = yelp_neg_dataset[neg_start].strip()
54 |         pos_sentence = yelp_pos_dataset[pos_start].strip()                
55 | 
56 |         sentences = [neg_sentence, pos_sentence]
57 |         """data input"""
58 |         for i in range(2):
59 |             # k=0: negative, k=1: positive
60 |             sentence = sentences[i]
61 | 
62 |             sen_idx = torch.tensor(tokenizer.encode(sentence)).cuda()
63 |             output = model(sen_idx)
64 |             
65 |             if len(sen_idx) == 1:
66 |                 continue
67 |             target = sen_idx[1:]
68 |             pred = output[0][:-1,:]            
69 | 
70 |             loss = lm_loss(pred, target)
71 |             loss.backward()
72 |             torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)  # Gradient clipping is not in AdamW anymore (so you can use amp without issue)
73 |             optimizer.step()
74 |             scheduler.step()
75 |             optimizer.zero_grad()
76 | 
77 | #             print(loss)
78 |         if (start+1)%pos_len == 0:
79 |             random.shuffle(yelp_neg_dataset)
80 |             random.shuffle(yelp_pos_dataset)
81 |             save_model((start+1)//pos_len)        
82 |     save_model('final') # final_model
83 |     
84 |     
85 | def save_model(name):
86 |     if not os.path.exists(str(name)+'/'):
87 |         os.makedirs(str(name)+'/')
88 |     model.save_pretrained('./'+str(name))
89 |     tokenizer.save_pretrained('./'+str(name))    
90 | 
91 | if __name__ == '__main__':
92 |     torch.cuda.empty_cache()
93 |     main()
94 |         


--------------------------------------------------------------------------------
/image/our_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rungjoo/Stable-Style-Transformer/c0c6749f3209d9179b6f71b3cac0d665bb00353a/image/our_model.png


--------------------------------------------------------------------------------