贴几段结构化寻优代码片断

数据挖掘与机器学习 fireling 853℃

代码片断1:寻优参数空间


search = {
    'algorithm': {
        'k-nn': {
                'n_neighbors': [5, 50]
            },
        'random-forest': {
            'n_estimators': [10, 50],
            'max_features': [0.4, 1.0]
        },
        'SVM': {
            'kernel': {
                'linear': {'C': [0, 2]},
                'rbf': {'gamma': [0, 1], 'C': [0, 10]},
                'poly': {'degree': [2, 5], 'C': [0, 50], 'coef0': [0, 1]}
            }
        }
    }
}

代码片断2:寻优策略及寻优目标


def choose_model(algorithm, n_neighbors=None, n_estimators=None, max_features=None,
                kernel=None, C=None, gamma=None, degree=None, coef0=None):
    # choose the model
    model = None
    if algorithm == 'k-nn':
        model = KNeighborsClassifier(n_neighbors=int(n_neighbors))
    elif algorithm == 'random-forest':
        model = RandomForestClassifier(n_estimators=int(n_estimators),
                                       max_features=max_features)
    elif algorithm == 'SVM':
        if kernel == 'linear':
            model = SVC(kernel=kernel, C=C)
        elif kernel == 'poly':
            model = SVC(kernel=kernel, C=C, degree=degree, coef0=coef0)
        elif kernel == 'rbf':
            model = SVC(kernel=kernel, C=C, gamma=gamma)
        else:
            pass
    else:
        pass
    return model

def my_performance(X_train, y_train, X_test, y_test,
                algorithm, n_neighbors=None, n_estimators=None, max_features=None,
                kernel=None, C=None, gamma=None, degree=None, coef0=None):
    params = algorithm, n_neighbors, n_estimators, max_features, kernel, C, gamma, degree, coef0
    model = choose_model(*params)
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    acc = accuracy_score(y_test, y_predict)
    return acc

代码片断3:寻优输出最佳


best_params, info, _ = optunity.maximize_structured( # default:'particle swarm'
    functools.partial(my_performance, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test),
    search_space=search,
    num_evals=num_evals,
    # pmap = optunity.pmap, # Parallel map using multiprocessing
    # pmap = pmap,
    pmap = create_pmap(number_of_processes)
)
df = optunity.call_log2dataframe(info.call_log)
df.sort_values('value', ascending=False, inplace=True)
print(best_params)
print(info.optimum)
# print(df)
base_estimator = choose_model(**best_params)

 

转载请注明:宁哥的小站 » 贴几段结构化寻优代码片断

喜欢 (6)