num_train_epochs=15, per_device_train_batch_size=16, per_device_eval_batch_size=16, warmup_steps=500, weight_decay=0.01, eval_steps=100, save_steps=100, gradient_accumulation_steps=8, learning_rate=3e-5, num_train_epochs=3, per_device_train_batch_size=32, per_device_eval_batch_size=32, warmup_steps=50, eval_steps=100, gradient_accumulation_steps=2, learning_rate=3e-5, lr_scheduler_type="reduce_lr_on_plateau", greater_is_better=False,