├── README.md ├── requirements.txt └── simpleSentimentAnalysis.py /README.md: -------------------------------------------------------------------------------- 1 | # little_language_model -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eust-w/little_language_model/a6fd3ec6d30ae08dab530d520eda1c36fb34817c/requirements.txt -------------------------------------------------------------------------------- /simpleSentimentAnalysis.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from transformers import Trainer, TrainingArguments 4 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 5 | import torch 6 | 7 | tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') 8 | model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased') 9 | 10 | train_texts = [ 11 | "I love machine learning.", 12 | "Transformers are amazing!", 13 | "I don't like the new update.", 14 | "Natural language processing is fascinating." 15 | ] 16 | 17 | train_labels = [1, 1, 0, 1] # 假设1代表正面情感,0代表负面情感 18 | 19 | 20 | class CustomDataset(torch.utils.data.Dataset): 21 | def __init__(self, encodings, labels): 22 | self.encodings = encodings 23 | self.labels = labels 24 | 25 | def __getitem__(self, idx): 26 | item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()} 27 | item['labels'] = torch.tensor(self.labels[idx]) 28 | return item 29 | 30 | def __len__(self): 31 | return len(self.labels) 32 | 33 | 34 | train_encodings = tokenizer(train_texts, truncation=True, padding=True) 35 | train_dataset = CustomDataset(train_encodings, train_labels) 36 | 37 | current_file_path = os.path.abspath(__file__) 38 | 39 | current_file_name = os.path.basename(current_file_path) 40 | 41 | training_args = TrainingArguments( 42 | output_dir='./results'+current_file_name, 43 | num_train_epochs=3, 44 | per_device_train_batch_size=8, 45 | per_device_eval_batch_size=8, 46 | warmup_steps=500, 47 | weight_decay=0.01, 48 | logging_dir='./logs'+current_file_name, 49 | ) 50 | 51 | trainer = Trainer( 52 | model=model, 53 | args=training_args, 54 | train_dataset=train_dataset, 55 | ) 56 | 57 | trainer.train() 58 | --------------------------------------------------------------------------------