AutoMLがオープンソースでも出てきており、細かいチューニングが不要な用途ではnotebookの体裁すら不要なノーコードの時代になってきています。本来はGUIでボタンぽちーで分析完了出来てしかるべきです。ちなみにお高いDataRobotや無料でもそこそこ使えるAutoAI with IBM Watson Studioでは既にGUIでAutoMLが可能です。
(base) masaru@MacBook-Pro-15 output-3epochs % tar -xvf output-3epochs_p_aa.tar.gz
x output-3epochs/
x output-3epochs/checkpoint-5000/
x output-3epochs/checkpoint-5000/config.json
x output-3epochs/checkpoint-5000/pytorch_model.bin: truncated gzip input
tar: Error exit delayed from previous errors.
(base) masaru@MacBook-Pro-15 output-3epochs % cat output-3epochs_p_* > output-3epochs.tar.gz
(base) masaru@MacBook-Pro-15 output-3epochs % ls -al
total 33769200
drwxr-xr-x 6 masaru staff 192 7 29 18:15 .
drwxr-xr-x 4 masaru staff 128 7 12 19:03 ..
-rw-r--r-- 1 masaru staff 8631501037 7 29 18:16 output-3epochs.tar.gz
-rw-r--r-- 1 masaru staff 3145728000 7 12 19:35 output-3epochs_p_aa.tar.gz
-rw-r--r-- 1 masaru staff 3145728000 7 21 19:47 output-3epochs_p_ab.tar.gz
-rw-r--r-- 1 masaru staff 2340045037 7 29 18:08 output-3epochs_p_ac.tar.gz
(base) masaru@MacBook-Pro-15 output-3epochs % tar -xvf output-3epochs.tar.gz
x output-3epochs/
x output-3epochs/checkpoint-5000/
x output-3epochs/checkpoint-5000/config.json
x output-3epochs/checkpoint-5000/pytorch_model.bin
x output-3epochs/checkpoint-5000/tokenizer_config.json
x output-3epochs/checkpoint-5000/special_tokens_map.json
x output-3epochs/checkpoint-5000/spiece.model
x output-3epochs/checkpoint-5000/training_args.bin
x output-3epochs/checkpoint-5000/optimizer.pt
x output-3epochs/checkpoint-5000/scheduler.pt
x output-3epochs/checkpoint-5000/trainer_state.json
x output-3epochs/checkpoint-10000/
x output-3epochs/checkpoint-10000/config.json
x output-3epochs/checkpoint-10000/pytorch_model.bin
x output-3epochs/checkpoint-10000/tokenizer_config.json
x output-3epochs/checkpoint-10000/special_tokens_map.json
x output-3epochs/checkpoint-10000/spiece.model
x output-3epochs/checkpoint-10000/training_args.bin
x output-3epochs/checkpoint-10000/optimizer.pt
x output-3epochs/checkpoint-10000/scheduler.pt
x output-3epochs/checkpoint-10000/trainer_state.json
x output-3epochs/config.json
x output-3epochs/pytorch_model.bin
x output-3epochs/tokenizer_config.json
x output-3epochs/special_tokens_map.json
x output-3epochs/spiece.model
x output-3epochs/training_args.bin
x output-3epochs/train_results.json
x output-3epochs/trainer_state.json
x output-3epochs/eval_results.json
x output-3epochs/all_results.json
importnumpyasnp,pandasaspd,os,matplotlib.pyplotasplt,seabornassnsimportjson,re,gc#garbage collectorfromsklearn.preprocessingimportLabelEncoderfromastimportliteral_evalfromsklearn.model_selectionimportKFoldfromsklearn.metricsimportmean_squared_errorfromsklearn.model_selectionimportGridSearchCV#Experimented hyperparams a bit with thisfromcatboostimportCatBoostRegressorfromxgboostimportXGBRegressorimportlightgbmaslgbfordirname,_,filenamesinos.walk('/home/masaru/data/kaggle_google_analytics'):forfilenameinfilenames:print(os.path.join(dirname,filename))passgc.enable()sns.set(style='whitegrid',palette='deep',font_scale=1.1,rc={'figure.figsize':[8,6]})pd.set_option('float_format','{:f}'.format)#to display full numbers in dataframe and not just exponentiated form
# -*- coding: utf-8 -*-
import psycopg2
import pandas as pd
import plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
init_notebook_mode()