├── README.md
├── requirements.txt
└── simpleSentimentAnalysis.py


/README.md:
--------------------------------------------------------------------------------
1 | # little_language_model


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eust-w/little_language_model/a6fd3ec6d30ae08dab530d520eda1c36fb34817c/requirements.txt


--------------------------------------------------------------------------------
/simpleSentimentAnalysis.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from transformers import Trainer, TrainingArguments
 4 | from transformers import AutoTokenizer, AutoModelForSequenceClassification
 5 | import torch
 6 | 
 7 | tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
 8 | model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')
 9 | 
10 | train_texts = [
11 |     "I love machine learning.",
12 |     "Transformers are amazing!",
13 |     "I don't like the new update.",
14 |     "Natural language processing is fascinating."
15 | ]
16 | 
17 | train_labels = [1, 1, 0, 1]  # 假设1代表正面情感，0代表负面情感
18 | 
19 | 
20 | class CustomDataset(torch.utils.data.Dataset):
21 |     def __init__(self, encodings, labels):
22 |         self.encodings = encodings
23 |         self.labels = labels
24 | 
25 |     def __getitem__(self, idx):
26 |         item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
27 |         item['labels'] = torch.tensor(self.labels[idx])
28 |         return item
29 | 
30 |     def __len__(self):
31 |         return len(self.labels)
32 | 
33 | 
34 | train_encodings = tokenizer(train_texts, truncation=True, padding=True)
35 | train_dataset = CustomDataset(train_encodings, train_labels)
36 | 
37 | current_file_path = os.path.abspath(__file__)
38 | 
39 | current_file_name = os.path.basename(current_file_path)
40 | 
41 | training_args = TrainingArguments(
42 |     output_dir='./results'+current_file_name,
43 |     num_train_epochs=3,
44 |     per_device_train_batch_size=8,
45 |     per_device_eval_batch_size=8,
46 |     warmup_steps=500,
47 |     weight_decay=0.01,
48 |     logging_dir='./logs'+current_file_name,
49 | )
50 | 
51 | trainer = Trainer(
52 |     model=model,
53 |     args=training_args,
54 |     train_dataset=train_dataset,
55 | )
56 | 
57 | trainer.train()
58 | 


--------------------------------------------------------------------------------