帮助文档
专业提供香港服务器、香港云服务器、香港高防服务器租用、香港云主机、台湾服务器、美国服务器、美国云服务器vps租用、韩国高防服务器租用、新加坡服务器、日本服务器租用 一站式全球网络解决方案提供商!专业运营维护IDC数据中心,提供高质量的服务器托管,服务器机房租用,服务器机柜租用,IDC机房机柜租用等服务,稳定、安全、高性能的云端计算服务,实时满足您的多样性业务需求。 香港大带宽稳定可靠,高级工程师提供基于服务器硬件、操作系统、网络、应用环境、安全的免费技术支持。
服务器资讯 / 香港服务器租用 / 香港VPS租用 / 香港云服务器 / 美国服务器租用 / 台湾服务器租用 / 日本服务器租用 / 官方公告 / 帮助文档
李宏毅2023机器学习作业1--homework1
发布时间:2024-03-01 08:32:43   分类:帮助文档
李宏毅2023机器学习作业1--homework1 一、前期准备 下载训练数据和测试数据 # dropbox link !wget -O covid_train.csv https://www.dropbox.com/s/lmy1riadzoy0ahw/covid.train.csv?dl=0 !wget -O covid_test.csv https://www.dropbox.com/s/zalbw42lu4nmhr2/covid.test.csv?dl=0 导入包 # Numerical Operations import math import numpy as np # numpy操作数据,增加删除查找修改 # Reading/Writing Data import pandas as pd # pandas读取csv文件 import os # 进行文件夹操作 import csv # For Progress Bar from tqdm import tqdm # 可视化 # Pytorch import torch # pytorch import torch.nn as nn from torch.utils.data import Dataset, DataLoader, random_split # For plotting learning curve from torch.utils.tensorboard import SummaryWriter 定义一些功能函数 def same_seed(seed): '''Fixes random number generator seeds for reproducibility.''' torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) # 划分训练数据集和验证数据集 def train_valid_split(data_set, valid_ratio, seed): '''Split provided training data into training set and validation set''' valid_set_size = int(valid_ratio * len(data_set)) train_set_size = len(data_set) - valid_set_size train_set, valid_set = random_split(data_set, [train_set_size, valid_set_size], generator=torch.Generator().manual_seed(seed)) return np.array(train_set), np.array(valid_set) 配置项 device = 'cuda' if torch.cuda.is_available() else 'cpu' config = { 'seed': 5201314, # Your seed number, you can pick your lucky number. :) 'select_all': False, # Whether to use all features. 'valid_ratio': 0.2, # validation_size = train_size * valid_ratio 'n_epochs': 5000, # Number of epochs. 'batch_size': 256, 'learning_rate': 1e-5, 'early_stop': 600, # If model has not improved for this many consecutive epochs, stop training. 'save_path': './models/model.ckpt' # Your model will be saved here. } 二、创建数据 创建Dataset class COVID19Dataset(Dataset): ''' x: Features. y: Targets, if none, do prediction. ''' def __init__(self, x, y=None): if y is None: self.y = y else: self.y = torch.FloatTensor(y) self.x = torch.FloatTensor(x) def __getitem__(self, idx): if self.y is None: return self.x[idx] else: return self.x[idx], self.y[idx] def __len__(self): return len(self.x) 特征选择 删除了belife和mental 的特征,belife和mental都是心理上精神上的特征,感觉可能和阳性率的偏差较大,就删去了这两类的特征 def select_feat(train_data, valid_data, test_data, select_all=True): '''Selects useful features to perform regression''' # [:,-1]第一个维度选择所有,选取所有行,第二个维度选择-1,-1是倒数第一个元素,也就是标签label y_train, y_valid = train_data[:,-1], valid_data[:,-1] # 选择标签元素 # [:,:-1]第一个维度选择所有,所有行,第二个维度从开始元素到倒数第一个元素(不包含倒数第一个元素) raw_x_train, raw_x_valid, raw_x_test = train_data[:,:-1], valid_data[:,:-1], test_data if select_all: feat_idx = list(range(raw_x_train.shape[1])) else: # feat_idx = list(range(35, raw_x_train.shape[1])) # TODO: Select suitable feature columns. """删除了belife和mental 的特征 [0, 38, 39, 46, 51, 56, 57, 64, 69, 74, 75, 82, 87]是belife和mental所在列 """ del_col = [0, 38, 39, 46, 51, 56, 57, 64, 69, 74, 75, 82, 87] raw_x_train = np.delete(raw_x_train, del_col, axis=1) # numpy数组增删查改方法 raw_x_valid = np.delete(raw_x_valid, del_col, axis=1) raw_x_test = np.delete(raw_x_test, del_col, axis=1) return raw_x_train, raw_x_valid, raw_x_test, y_train, y_valid return raw_x_train[:,feat_idx], raw_x_valid[:,feat_idx], raw_x_test[:,feat_idx], y_train, y_valid  创建 Dataloader 读取文件,设置训练,验证和测试数据集 # Set seed for reproducibility same_seed(config['seed']) # train_data size: 3009 x 89 (35 states + 18 features x 3 days) # train_data共3009条数据,每条数据89个维度 # test_data size: 997 x 88 (without last day's positive rate) # test_data共997条数据,每条数据88个维度,没有最后一天的最后一列数据positive rate # pands读取csv数据 train_data, test_data = pd.read_csv('./covid_train.csv').values, pd.read_csv('./covid_test.csv').values # train_valid_split切分训练集和验证集 train_data, valid_data = train_valid_split(train_data, config['valid_ratio'], config['seed']) # Print out the data size.打印数据尺寸 print(f"""train_data size: {train_data.shape} valid_data size: {valid_data.shape} test_data size: {test_data.shape}""") # Select features 选择特征 x_train, x_valid, x_test, y_train, y_valid = select_feat(train_data, valid_data, test_data, config['select_all']) # Print out the number of features. 打印特征数 print(f'number of features: {x_train.shape[1]}') # 生成dataset train_dataset, valid_dataset, test_dataset = COVID19Dataset(x_train, y_train), \ COVID19Dataset(x_valid, y_valid), \ COVID19Dataset(x_test) # Pytorch data loader loads pytorch dataset into batches. # pytorch的dataloder加载dataset train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) valid_loader = DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, pin_memory=True) test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=False, pin_memory=True)  三、创建神经网络模型 class My_Model(nn.Module): def __init__(self, input_dim): super(My_Model, self).__init__() # TODO: modify model's structure, be aware of dimensions. self.layers = nn.Sequential( nn.Linear(input_dim, 16), nn.ReLU(), nn.Linear(16, 8), nn.ReLU(), nn.Linear(8, 1) ) def forward(self, x): x = self.layers(x) x = x.squeeze(1) # (B, 1) -> (B) return x 四、模型训练和模型测试 模型训练 def trainer(train_loader, valid_loader, model, config, device): criterion = nn.MSELoss(reduction='mean') # Define your loss function, do not modify this. # Define your optimization algorithm. # TODO: Please check https://pytorch.org/docs/stable/optim.html to get more available algorithms. # TODO: L2 regularization (optimizer(weight decay...) or implement by your self). optimizer = torch.optim.SGD(model.parameters(), lr=config['learning_rate'], momentum=0.9) writer = SummaryWriter() # Writer of tensoboard. # 如果没有models文件夹,创建名称为models的文件夹,保存模型 if not os.path.isdir('./models'): os.mkdir('./models') # Create directory of saving models. # math.inf为无限大 n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0 for epoch in range(n_epochs): model.train() # Set your model to train mode. loss_record = [] # 记录损失 # tqdm is a package to visualize your training progress. train_pbar = tqdm(train_loader, position=0, leave=True) for x, y in train_pbar: optimizer.zero_grad() # Set gradient to zero. x, y = x.to(device), y.to(device) # Move your data to device. pred = model(x) # 数据传入模型model,生成预测值pred loss = criterion(pred, y) # 预测值pred和真实值y计算损失loss loss.backward() # Compute gradient(backpropagation). optimizer.step() # Update parameters. step += 1 loss_record.append(loss.detach().item()) # 当前步骤的loss加到loss_record[] # Display current epoch number and loss on tqdm progress bar. train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]') train_pbar.set_postfix({'loss': loss.detach().item()}) mean_train_loss = sum(loss_record)/len(loss_record) # 计算训练集上平均损失 writer.add_scalar('Loss/train', mean_train_loss, step) model.eval() # Set your model to evaluation mode. loss_record = [] for x, y in valid_loader: x, y = x.to(device), y.to(device) with torch.no_grad(): pred = model(x) loss = criterion(pred, y) loss_record.append(loss.item()) mean_valid_loss = sum(loss_record)/len(loss_record) # 计算验证集上平均损失 print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}') writer.add_scalar('Loss/valid', mean_valid_loss, step) # 保存验证集上平均损失最小的模型 if mean_valid_loss < best_loss: best_loss = mean_valid_loss torch.save(model.state_dict(), config['save_path']) # Save your best model print('Saving model with loss {:.3f}...'.format(best_loss)) early_stop_count = 0 else: early_stop_count += 1 # 设置早停early_stop_count # 如果early_stop_count次数,验证集上的平均损失没有变化,模型性能没有提升,停止训练 if early_stop_count >= config['early_stop']: print('\nModel is not improving, so we halt the training session.') return 模型测试 # 测试数据集的预测 def predict(test_loader, model, device): model.eval() # Set your model to evaluation mode. preds = [] for x in tqdm(test_loader): x = x.to(device) with torch.no_grad(): # 关闭梯度 pred = model(x) preds.append(pred.detach().cpu()) preds = torch.cat(preds, dim=0).numpy() return preds   五、训练模型 model = My_Model(input_dim=x_train.shape[1]).to(device) # put your model and data on the same computation device. trainer(train_loader, valid_loader, model, config, device) 六、测试模型,生成预测值 def save_pred(preds, file): ''' Save predictions to specified file ''' with open(file, 'w') as fp: writer = csv.writer(fp) writer.writerow(['id', 'tested_positive']) for i, p in enumerate(preds): writer.writerow([i, p]) model = My_Model(input_dim=x_train.shape[1]).to(device) model.load_state_dict(torch.load(config['save_path'])) # 加载模型 preds = predict(test_loader, model, device) # 生成预测结果preds save_pred(preds, 'pred.csv') # 保存preds到pred.csv tensorboard可视化训练和验证损失图像 %reload_ext tensorboard %tensorboard --logdir=./runs/ 参考: 李宏毅_机器学习_作业1(详解)_COVID-19 Cases Prediction (Regression)-物联沃-IOTWORD物联网 【深度学习】2023李宏毅homework1作业一代码详解_李宏毅作业1-博客 np.delete详解-博客
香港云服务器租用推荐
服务器租用资讯
·广东云服务有限公司怎么样
·广东云服务器怎么样
·广东锐讯网络有限公司怎么样
·广东佛山的蜗牛怎么那么大
·广东单位电话主机号怎么填写
·管家婆 花生壳怎么用
·官网域名过期要怎么办
·官网邮箱一般怎么命名
·官网网站被篡改怎么办
服务器租用推荐
·美国服务器租用
·台湾服务器租用
·香港云服务器租用
·香港裸金属服务器
·香港高防服务器租用
·香港服务器租用特价