Lib GPyOpt

홈페이지, 깃허브, 매뉴얼(Jupyter), ReadTheDocs

# 패키지 설치 
sudo apt-get install python-pip
pip install gpyopt --user

# 소스 설치 
  git clone https://github.com/SheffieldML/GPyOpt.git
  cd GPyOpt
  python setup.py develop

GPy only supports python 2.7 and not python 3

실행 방법

python 콘솔

# --- Load GPyOpt
from GPyOpt.methods import BayesianOptimization
import numpy as np

# --- Define your problem
def f(x): return (6*x-2)**2*np.sin(12*x-4)
domain = [{'name': 'var_1', 'type': 'continuous', 'domain': (0,1)}]

# --- Solve your problem
myBopt = BayesianOptimization(f=f, domain=domain)
myBopt.run_optimization(max_iter=15)
myBopt.plot_acquisition()

설정 파일

# myfunc.py
def myfunc(x,y):
    return (4-2.1*x**2 + x**4/3)*x**2 + x*y + (-4 +4*y**2)*y**2
# config.json
{
    "language"        : "PYTHON",
    "main-file"       : "myfunc.py",
    "experiment-name" : "simple-example",
    "likelihood"      : "gaussian",
    "resources": {
        "maximum-iterations" :  1,
        "max-run-time": "NA"
    },
    "variables" : {
        "y" : {
            "type" : "FLOAT",
            "size" : 1,
            "min"  : -3,
            "max"  : 3
        },
        "x" : {
            "type" : "FLOAT",
            "size" : 1,
            "min"  : -2,
            "max"  : 2
        }
    },
    "output":{
        "verbosity": true
    }
}
$ gpyopt.py ../myproblem/config.json
# `Evaluations.txt:`  containing the locations and values of the function evaluations.
# `Models.txt`: containing the parameters of all the models used.

Examples

A. 1D example /w supported object

%pylab inline  
import GPy
import GPyOpt

# Create the true and perturbed Forrester function and the boundaries of the problem
f_true= GPyOpt.objective_examples.experiments1d.forrester()          # noisy version
bounds = [{'name': 'var_1', 'type': 'continuous', 'domain': (0,1)}]  # problem constraints 

#시각화 
#f_true.plot()
    """pandas기반 시각화 
    #Plot the function 
    x = pd.Series(np.linspace(-5,4,1000))
    f_x = pd.Series.apply(x, obj_func)

    plt.plot(x, f_x, 'b-')
    plt.show()
    """
# Creates GPyOpt object with the model and anquisition fucntion
seed(123)
myBopt = GPyOpt.methods.BayesianOptimization(f=f_true.f,            # function to optimize       
                                             domain=bounds,        # box-constraints of the problem
                                             acquisition_type='EI',
                                             exact_feval = True) # Selects the Expected improvement

# Run the optimization
max_iter = 15     # evaluation budget
max_time = 60     # time budget 
eps      = 10e-6  # Minimum allows distance between the las two observations

myBopt.run_optimization(max_iter, max_time, eps)   

#check the best found location x∗ by
myBopt.x_opt

#predicted value value of f at x∗ optimum by
myBopt.fx_opt

#그래프 결과(??) #In one dimensional examples
myBopt.plot_acquisition()

#그래프 결과(??) #In any dimensional examples
## The distance between the last two observations.
## The value of  ff  at the best location previous to each iteration.
myBopt.plot_convergence()

B. 2D example /w supported object

%pylab inline  
import GPy
import GPyOpt

# create the object function
f_true = GPyOpt.objective_examples.experiments2d.sixhumpcamel()
f_sim = GPyOpt.objective_examples.experiments2d.sixhumpcamel(sd = 0.1)
bounds =[{'name': 'var_1', 'type': 'continuous', 'domain': f_true.bounds[0]},
         {'name': 'var_2', 'type': 'continuous', 'domain': f_true.bounds[1]}
         #{'name': 'var3', 'type': 'discrete', 'domain': (3,8,10),'dimensionality': 2}, # discrete Datatype
         #{'name': 'var4', 'type': 'categorical', 'domain': (0,1,2),'dimensionality': 1}, #categorical Datatype
        ]

f_true.plot()

# Creates three identical objects that we will later use to compare the optimization strategies 

myBopt2D = GPyOpt.methods.BayesianOptimization(f_sim.f,
                                              domain=bounds,
                                              model_type = 'GP',
                                              acquisition_type='EI',  
                                              initial_design_numdata = 10,
                                              ##parallel Bayesian optimization
                                              #evaluator_type = 'local_penalization',
                                              #batch_size = 4,
                                              #num_cores = 4,
                                              normalize_Y = True,
                                              acquisition_weight = 2)  

# runs the optimization for the three methods
max_iter = 40  # maximum time 40 iterations
max_time = 60  # maximum time 60 seconds

myBopt2D.run_optimization(max_iter,max_time,verbosity=False)

myBopt2D.plot_acquisition()
myBopt2D.plot_convergence()

C. 2D example /w Own object

Modular Bayesian Optimization

D. Scikit-learn

%pylab inline  
import GPy
import GPyOpt
import numpy as np
from sklearn import svm
from numpy.random import seed
seed(12345)

# Let's load the dataset
## https://raw.githubusercontent.com/NYXFLOWER/Machine-Learning/master/warm/olympicMarathonTimes.csv
GPy.util.datasets.authorize_download = lambda x: True # prevents requesting authorization for download.
data = GPy.util.datasets.olympic_marathon_men()
X = data['X']
Y = data['Y']
X_train = X[:20]
Y_train = Y[:20,0]
X_test = X[20:]
Y_test = Y[20:,0]


nfold = 3

def fit_svr_val(x):
    x = np.atleast_2d(np.exp(x))
    fs = np.zeros((x.shape[0],1))
    for i in range(x.shape[0]):
        fs[i] = 0
        for n in range(nfold):
            idx = np.array(range(X_train.shape[0]))
            idx_valid = np.logical_and(idx>=X_train.shape[0]/nfold*n, idx<X_train.shape[0]/nfold*(n+1))
            idx_train = np.logical_not(idx_valid)
            svr = svm.SVR(C=x[i,0], epsilon=x[i,1],gamma=x[i,2])
            svr.fit(X_train[idx_train],Y_train[idx_train])
            fs[i] += np.sqrt(np.square(svr.predict(X_train[idx_valid])-Y_train[idx_valid]).mean())
        fs[i] *= 1./nfold
    return fs

## -- Note that similar wrapper functions can be used to tune other Scikit-learn methods


domain       =[{'name': 'C',      'type': 'continuous', 'domain': (0.,7.)},
               {'name': 'epsilon','type': 'continuous', 'domain': (-12.,-2.)},
               {'name': 'gamma',  'type': 'continuous', 'domain': (-12.,-2.)}]


opt = GPyOpt.methods.BayesianOptimization(f = fit_svr_val,            # function to optimize       
                                          domain = domain,         # box-constraints of the problem
                                          acquisition_type ='LCB',       # LCB acquisition
                                          acquisition_weight = 0.1)   # Exploration exploitation

# it may take a few seconds
opt.run_optimization(max_iter=50)
opt.plot_convergence()

x_best = np.exp(opt.X[np.argmin(opt.Y)])
print("The best parameters obtained: C="+str(x_best[0])+", epilson="+str(x_best[1])+", gamma="+str(x_best[2]))
svr = svm.SVR(C=x_best[0], epsilon=x_best[1],gamma=x_best[2])

## Baseline wo BO
# print("The default parameters obtained: C="+str(svr.C)+", epilson="+str(svr.epsilon)+", gamma="+str(svr.gamma))
# svr = svm.SVR()

svr.fit(X_train,Y_train)
Y_train_pred = svr.predict(X_train)
Y_test_pred = svr.predict(X_test)

plot(X_train,Y_train_pred,'b',label='pred-train')
plot(X_test,Y_test_pred,'g',label='pred-test')
plot(X_train,Y_train,'rx',label='ground truth')
plot(X_test,Y_test,'rx')
legend(loc='best')
print("RMSE = "+str(np.sqrt(np.square(Y_test_pred-Y_test).mean())))

GPyOpt: configuring Scikit-learn methods

E. Integrating the model hyperparameters

GPyOpt: Integrating the model hyperparameters

results matching ""

    No results matching ""