Python 集成树算法与在线学习实战

Python 集成树算法与在线学习实战 | 极客日志

from sklearn.datasets import make_classification
from sklearn.metrics import classification_report, accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.grid_search import RandomizedSearchCV
from operator import itemgetter
import numpy as np

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.6 * no_features)
    repeated_features = int(0.1 * no_features)
    x, y = make_classification(n_samples=500, n_features=no_features, flip_y=0.03,
                               n_informative=informative_features, n_redundant=redundant_features,
                               n_repeated=repeated_features, random_state=7)
    return x, y

def build_forest(x, y, x_dev, y_dev):
    """ Build a random forest of fully grown trees and evaluate peformance """
    no_trees = 100
    estimator = RandomForestClassifier(n_estimators=no_trees)
    estimator.fit(x, y)
    train_predcited = estimator.predict(x)
    train_score = accuracy_score(y, train_predcited)
    dev_predicted = estimator.predict(x_dev)
    dev_score = accuracy_score(y_dev, dev_predicted)
    print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))

def search_parameters(x, y, x_dev, y_dev):
    """ Search the parameters of random forest algorithm """
    estimator = RandomForestClassifier()
    no_features = x.shape[1]
    no_iterations = 20
    sqr_no_features = int(np.sqrt(no_features))
    parameters = {
        "n_estimators": np.random.randint(75, 200, no_iterations),
        "criterion": ["gini", "entropy"],
        "max_features": [sqr_no_features, sqr_no_features * 2, sqr_no_features * 3, sqr_no_features + 10]
    }
    grid = RandomizedSearchCV(estimator=estimator, param_distributions=parameters,
                              verbose=1, n_iter=no_iterations, random_state=77, n_jobs=-1, cv=5)
    grid.fit(x, y)
    print_model_worth(grid, x_dev, y_dev)
    return grid.best_estimator_

def print_model_worth(grid, x_dev, y_dev):
    # Print the goodness of the models
    # We take the top 5 models
    scores = sorted(grid.grid_scores_, key=itemgetter(1), reverse=True)[0:5]
    for model_no, score in enumerate(scores):
        print("Model %d, Score = %0.3f" % (model_no + 1, score.mean_validation_score))
        print("Parameters = {0}".format(score.parameters))
    dev_predicted = grid.predict(x_dev)
    print(classification_report(y_dev, dev_predicted))

if __name__ == "__main__":
    x, y = get_data()
    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)
    build_forest(x_train, y_train, x_dev, y_dev)
    model = search_parameters(x, y, x_dev, y_dev)
    get_feature_importance(model)

# Divide the data into Train, dev and test
x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)

x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)

build_forest(x_train, y_train, x_dev, y_dev)

no_trees = 100
estimator = RandomForestClassifier(n_estimators=no_trees)
estimator.fit(x, y)
train_predcited = estimator.predict(x)
train_score = accuracy_score(y, train_predcited)
dev_predicted = estimator.predict(x_dev)
dev_score = accuracy_score(y_dev, dev_predicted)
print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))

parameters = {
    "n_estimators": np.random.randint(75, 200, no_iterations),
    "criterion": ["gini", "entropy"],
    "max_features": [sqr_no_features, sqr_no_features * 2, sqr_no_features * 3, sqr_no_features + 10]
}

no_iterations = 20

sqr_no_features = int(np.sqrt(no_features))

grid = RandomizedSearchCV(estimator=estimator, param_distributions=parameters,
                          verbose=1, n_iter=no_iterations, random_state=77, n_jobs=-1, cv=5)

estimator = RandomForestClassifier()

grid.fit(x, y)
print_model_worth(grid, x_dev, y_dev)

scores = sorted(grid.grid_scores_, key=itemgetter(1), reverse=True)[0:5]

for model_no, score in enumerate(scores):
    print("Model %d, Score = %0.3f" % (model_no + 1, score.mean_validation_score))
    print("Parameters = {0}".format(score.parameters))
    print

dev_predicted = grid.predict(x_dev)
print(classification_report(y_dev, dev_predicted))

def get_feature_importance(model):
    feature_importance = model.feature_importances_
    fm_with_id = [(i, importance) for i, importance in enumerate(feature_importance)]
    fm_with_id = sorted(fm_with_id, key=itemgetter(1), reverse=True)[0:10]
    print("Top 10 Features")
    for importance in fm_with_id:
        print("Feature %d importance = %0.3f" % (importance[0], importance[1]))
    print

feature_importance = model.feature_importances_
fm_with_id = [(i, importance) for i, importance in enumerate(feature_importance)]

fm_with_id = sorted(fm_with_id, key=itemgetter(1), reverse=True)[0:10]

from sklearn.datasets import make_classification
from sklearn.metrics import classification_report, accuracy_score
from sklearn.cross_validation import train_test_split, cross_val_score
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.grid_search import RandomizedSearchCV
from operator import itemgetter

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.6 * no_features)
    repeated_features = int(0.1 * no_features)
    x, y = make_classification(n_samples=500, n_features=no_features, flip_y=0.03,
                               n_informative=informative_features, n_redundant=redundant_features,
                               n_repeated=repeated_features, random_state=7)
    return x, y

def build_forest(x, y, x_dev, y_dev):
    """ Build a Extremely random tress and evaluate peformance """
    no_trees = 100
    estimator = ExtraTreesClassifier(n_estimators=no_trees, random_state=51)
    estimator.fit(x, y)
    train_predcited = estimator.predict(x)
    train_score = accuracy_score(y, train_predcited)
    dev_predicted = estimator.predict(x_dev)
    dev_score = accuracy_score(y_dev, dev_predicted)
    print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))
    print("cross validated score")
    print(cross_val_score(estimator, x_dev, y_dev, cv=5))

def search_parameters(x, y, x_dev, y_dev):
    """ Search the parameters """
    estimator = ExtraTreesClassifier()
    no_features = x.shape[1]
    no_iterations = 20
    sqr_no_features = int(np.sqrt(no_features))
    parameters = {
        "n_estimators": np.random.randint(75, 200, no_iterations),
        "criterion": ["gini", "entropy"],
        "max_features": [sqr_no_features, sqr_no_features * 2, sqr_no_features * 3, sqr_no_features + 10]
    }
    grid = RandomizedSearchCV(estimator=estimator, param_distributions=parameters,
                              verbose=1, n_iter=no_iterations, random_state=77, n_jobs=-1, cv=5)
    grid.fit(x, y)
    print_model_worth(grid, x_dev, y_dev)
    return grid.best_estimator_

if __name__ == "__main__":
    x, y = get_data()
    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)
    build_forest(x_train, y_train, x_dev, y_dev)
    model = search_parameters(x, y, x_dev, y_dev)

# Divide the data into Train, dev and test
x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)

x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)

build_forest(x_train, y_train, x_dev, y_dev)

no_trees = 100
estimator = ExtraTreesClassifier(n_estimators=no_trees, random_state=51)
estimator.fit(x, y)
train_predcited = estimator.predict(x)
train_score = accuracy_score(y, train_predcited)
dev_predicted = estimator.predict(x_dev)
dev_score = accuracy_score(y_dev, dev_predicted)
print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))
print("cross validated score")
print(cross_val_score(estimator, x_dev, y_dev, cv=5))

train_predcited = estimator.predict(x)

train_score = accuracy_score(y, train_predcited)
dev_predicted = estimator.predict(x_dev)
dev_score = accuracy_score(y_dev, dev_predicted)

print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))

dev_predicted = grid.predict(x_dev)
print(classification_report(y_dev, dev_predicted))

from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.cross_validation import train_test_split
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
import numpy as np

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 50
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.6 * no_features)
    repeated_features = int(0.1 * no_features)
    x, y = make_classification(n_samples=500, n_features=no_features, flip_y=0.03,
                               n_informative=informative_features, n_redundant=redundant_features,
                               n_repeated=repeated_features, random_state=7)
    return x, y

def get_random_subset(iterable, k):
    subsets = []
    iteration = 0
    np.random.shuffle(iterable)
    subset = 0
    limit = len(iterable) / k
    while iteration < limit:
        if k <= len(iterable):
            subset = k
        else:
            subset = len(iterable)
        subsets.append(iterable[-subset:])
        del iterable[-subset:]
        iteration += 1
    return subsets

def build_rotationtree_model(x_train, y_train, d, k):
    models = []
    r_matrices = []
    feature_subsets = []
    for i in range(d):
        x, _, _, _ = train_test_split(x_train, y_train, test_size=0.3, random_state=7)
        # Features ids
        feature_index = range(x.shape[1])
        # Get subsets of features
        random_k_subset = get_random_subset(feature_index, k)
        feature_subsets.append(random_k_subset)
        # Rotation matrix
        R_matrix = np.zeros((x.shape[1], x.shape[1]), dtype=float)
        for each_subset in random_k_subset:
            pca = PCA()
            x_subset = x[:, each_subset]
            pca.fit(x_subset)
            for ii in range(0, len(pca.components_)):
                for jj in range(0, len(pca.components_)):
                    R_matrix[each_subset[ii], each_subset[jj]] = pca.components_[ii, jj]
        x_transformed = x_train.dot(R_matrix)
        model = DecisionTreeClassifier()
        model.fit(x_transformed, y_train)
        models.append(model)
        r_matrices.append(R_matrix)
    return models, r_matrices, feature_subsets

def model_worth(models, r_matrices, x, y):
    predicted_ys = []
    for i, model in enumerate(models):
        x_mod = x.dot(r_matrices[i])
        predicted_y = model.predict(x_mod)
        predicted_ys.append(predicted_y)
    predicted_matrix = np.asmatrix(predicted_ys)
    final_prediction = []
    for i in range(len(y)):
        pred_from_all_models = np.ravel(predicted_matrix[:, i])
        non_zero_pred = np.nonzero(pred_from_all_models)[0]
        is_one = len(non_zero_pred) > len(models) / 2
        final_prediction.append(is_one)
    print(classification_report(y, final_prediction))

if __name__ == "__main__":
    x, y = get_data()
    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)
    # Build a bag of models
    models, r_matrices, features = build_rotationtree_model(x_train, y_train, 25, 5)
    model_worth(models, r_matrices, x_train, y_train)
    model_worth(models, r_matrices, x_dev, y_dev)

# Divide the data into Train, dev and test
x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)

x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)

models, r_matrices, features = build_rotationtree_model(x_train, y_train, 25, 5)

models = []
r_matrices = []
feature_subsets = []

x, _, _, _ = train_test_split(x_train, y_train, test_size=0.3, random_state=7)

# Features ids
feature_index = range(x.shape[1])
# Get subsets of features
random_k_subset = get_random_subset(feature_index, k)
feature_subsets.append(random_k_subset)

np.random.shuffle(iterable)

limit = len(iterable) / k
while iteration < limit:
    if k <= len(iterable):
        subset = k
    else:
        subset = len(iterable)
    iteration += 1

subsets.append(iterable[-subset:])

del iterable[-subset:]

# Rotation matrix
R_matrix = np.zeros((x.shape[1], x.shape[1]), dtype=float)

for each_subset in random_k_subset:
    pca = PCA()
    x_subset = x[:, each_subset]
    pca.fit(x_subset)

for ii in range(0, len(pca.components_)):
    for jj in range(0, len(pca.components_)):
        R_matrix[each_subset[ii], each_subset[jj]] = pca.components_[ii, jj]

2,4,6and1,3,5

R_matrix[each_subset[ii], each_subset[jj]] = pca.components_[ii, jj]

x_transformed = x_train.dot(R_matrix)

model = DecisionTreeClassifier()
model.fit(x_transformed, y_train)

models.append(model)
r_matrices.append(R_matrix)

model_worth(models, r_matrices, x_train, y_train)
model_worth(models, r_matrices, x_dev, y_dev)

for i, model in enumerate(models):
    x_mod = x.dot(r_matrices[i])
    predicted_y = model.predict(x_mod)
    predicted_ys.append(predicted_y)

predicted_matrix = np.asmatrix(predicted_ys)

final_prediction = []
for i in range(len(y)):
    pred_from_all_models = np.ravel(predicted_matrix[:, i])
    non_zero_pred = np.nonzero(pred_from_all_models)[0]
    is_one = len(non_zero_pred) > len(models) / 2
    final_prediction.append(is_one)

print(classification_report(y, final_prediction))

from sklearn.datasets import make_classification
from sklearn.metrics import classification_report
from sklearn.preprocessing import scale
import numpy as np

def get_data(batch_size):
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    b_size = 0
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.8 * no_features)
    repeated_features = int(0.1 * no_features)
    while b_size < batch_size:
        x, y = make_classification(n_samples=1000, n_features=no_features, flip_y=0.03,
                                   n_informative=informative_features, n_redundant=redundant_features,
                                   n_repeated=repeated_features, random_state=51)
        y_indx = y < 1
        y[y_indx] = -1
        x = scale(x, with_mean=True, with_std=True)
        yield x, y
        b_size += 1

def build_model(x, y, weights, epochs, alpha=0.5):
    """ Simple Perceptron """
    for i in range(epochs):
        # Shuffle the dataset
        shuff_index = np.random.shuffle(range(len(y)))
        x_train = x[shuff_index, :].reshape(x.shape)
        y_train = np.ravel(y[shuff_index, :])
        # Build weights one instance at a time
        for index in range(len(y)):
            prediction = np.sign(np.sum(x_train[index, :] * weights))
            if prediction != y_train[index]:
                weights = weights + alpha * (y_train[index] * x_train[index, :])
    return weights

def model_worth(x, y, weights):
    prediction = np.sign(np.sum(x * weights, axis=1))
    print(classification_report(y, prediction))

if __name__ == "__main__":
    data = get_data(10)
    x, y = data.next()
    weights = np.zeros(x.shape[1])
    for i in range(10):
        epochs = 100
        weights = build_model(x, y, weights, epochs)
        print
        print("Model worth after receiving dataset batch %d" % (i + 1))
        model_worth(x, y, weights)
        print
        if i < 9:
            x, y = data.next()

data = get_data(10)

x, y = data.next()

x, y = make_classification(n_samples=1000, n_features=no_features, flip_y=0.03,
                           n_informative=informative_features, n_redundant=redundant_features,
                           n_repeated=repeated_features, random_state=51)

y_indx = y < 1
y[y_indx] = -1

weights = np.zeros(x.shape[1])

for i in range(10):
    epochs = 100
    weights = build_model(x, y, weights, epochs)

def build_model(x, y, weights, epochs, alpha=0.5)

# Shuffle the dataset
shuff_index = np.random.shuffle(range(len(y)))
x_train = x[shuff_index, :].reshape(x.shape)
y_train = np.ravel(y[shuff_index, :])

# Build weights one instance at a time
for index in range(len(y)):
    prediction = np.sign(np.sum(x_train[index, :] * weights))
    if prediction != y_train[index]:
        weights = weights + alpha * (y_train[index] * x_train[index, :])

prediction = np.sign(np.sum(x_train[index, :] * weights))

weights = weights + alpha * (y_train[index] * x_train[index, :])

print
print("Model worth after receiving dataset batch %d" % (i + 1))
model_worth(x, y, weights)
print

def get_data(batch_size):
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    b_size = 0
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.8 * no_features)
    repeated_features = int(0.1 * no_features)
    poly = PolynomialFeatures(degree=2)
    while b_size < batch_size:
        x, y = make_classification(n_samples=1000, n_features=no_features, flip_y=0.03,
                                   n_informative=informative_features, n_redundant=redundant_features,
                                   n_repeated=repeated_features, random_state=51)
        y_indx = y < 1
        y[y_indx] = -1
        x = poly.fit_transform(x)
        yield x, y
        b_size += 1

from sklearn.datasets import make_regression
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.cross_validation import train_test_split

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 30
    x, y = make_regression(n_samples=1000, n_features=no_features,
                           random_state=51)
    return x, y

def build_model(x, y):
    estimator = SGDRegressor(n_iter=10, shuffle=True, loss="squared_loss",
                             learning_rate='constant', eta0=0.01, fit_intercept=True,
                             penalty='none')
    estimator.fit(x, y)
    return estimator

def model_worth(model, x, y):
    predicted_y = model.predict(x)
    print("\nMean absolute error = %0.2f" % mean_absolute_error(y, predicted_y))
    print("Mean squared error = %0.2f" % mean_squared_error(y, predicted_y))

def inspect_model(model):
    print("\nModel Intercept {0}".format(model.intercept_))
    print
    for i, coef in enumerate(model.coef_):
        print("Coefficient {0} = {1:.3f}".format(i + 1, coef))

if __name__ == "__main__":
    x, y = get_data()
    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)
    model = build_model(x_train, y_train)
    inspect_model(model)
    print("Model worth on train data")
    model_worth(model, x_train, y_train)
    print("Model worth on dev data")
    model_worth(model, x_dev, y_dev)
    # Building model with l2 regularization
    model = build_model_regularized(x_train, y_train)
    inspect_model(model)

x, y = get_data()

no_features = 30
x, y = make_regression(n_samples=1000, n_features=no_features,
                       random_state=51)

# Divide the data into Train, dev and test
x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)

x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)

model = build_model(x_train, y_train)

estimator = SGDRegressor(n_iter=10, shuffle=True, loss="squared_loss",
                         learning_rate='constant', eta0=0.01, fit_intercept=True,
                         penalty='none')
estimator.fit(x, y)

inspect_model(model)

print("Model worth on train data")
model_worth(model, x_train, y_train)

def build_model_regularized(x, y):
    estimator = SGDRegressor(n_iter=10, shuffle=True, loss="squared_loss",
                             learning_rate='constant', eta0=0.01, fit_intercept=True,
                             penalty='l2', alpha=0.01)
    estimator.fit(x, y)
    return estimator

model = build_model_regularized(x_train, y_train)
inspect_model(model)

estimator = SGDRegressor(n_iter=10, shuffle=True, loss="squared_loss",
                         learning_rate='constant', eta0=0.01, fit_intercept=True,
                         penalty='l2', alpha=0.01)

from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import SGDClassifier
import numpy as np

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.6 * no_features)
    repeated_features = int(0.1 * no_features)
    x, y = make_classification(n_samples=1000, n_features=no_features, flip_y=0.03,
                               n_informative=informative_features, n_redundant=redundant_features,
                               n_repeated=repeated_features, random_state=7)
    return x, y

def build_model(x, y, x_dev, y_dev):
    estimator = SGDClassifier(n_iter=50, shuffle=True, loss="log",
                              learning_rate="constant", eta0=0.0001, fit_intercept=True,
                              penalty="none")
    estimator.fit(x, y)
    train_predcited = estimator.predict(x)
    train_score = accuracy_score(y, train_predcited)
    dev_predicted = estimator.predict(x_dev)
    dev_score = accuracy_score(y_dev, dev_predicted)
    print
    print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))

if __name__ == "__main__":
    x, y = get_data()
    # Divide the data into Train, dev and test
    x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)
    x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)
    build_model(x_train, y_train, x_dev, y_dev)

def get_data():
    """ Make a sample classification dataset
    Returns : Independent variable y, dependent variable x
    """
    no_features = 30
    redundant_features = int(0.1 * no_features)
    informative_features = int(0.6 * no_features)
    repeated_features = int(0.1 * no_features)
    x, y = make_classification(n_samples=500, n_features=no_features, flip_y=0.03,
                               n_informative=informative_features, n_redundant=redundant_features,
                               n_repeated=repeated_features, random_state=7)
    return x, y

# Divide the data into Train, dev and test
x_train, x_test_all, y_train, y_test_all = train_test_split(x, y, test_size=0.3, random_state=9)

x_dev, x_test, y_dev, y_test = train_test_split(x_test_all, y_test_all, test_size=0.3, random_state=9)

build_model(x_train, y_train, x_dev, y_dev)

estimator = SGDClassifier(n_iter=50, shuffle=True, loss="log",
                          learning_rate="constant", eta0=0.0001, fit_intercept=True,
                          penalty="none")

estimator.fit(x, y)
train_predcited = estimator.predict(x)
train_score = accuracy_score(y, train_predcited)
dev_predicted = estimator.predict(x_dev)
dev_score = accuracy_score(y_dev, dev_predicted)
print
print("Training Accuracy = %0.2f Dev Accuracy = %0.2f" % (train_score, dev_score))

estimator = SGDClassifier(n_iter=50, shuffle=True, loss="log",
                          learning_rate="invscaling", eta0=0.001, fit_intercept=True,
                          penalty="none")

Python 集成树算法与在线学习实战

第九章：生长树

介绍

从树到森林——随机森林

准备开始

如何操作…

它是如何工作的…

还有更多…

生长极端随机树

准备好了…

如何做到这一点…

它是如何工作的…

还有更多内容……

旋转森林的生长

准备工作…

如何实现…

它是如何工作的…

还有更多内容…

第十章大规模机器学习 – 在线学习

引言

使用感知机作为在线学习算法

准备工作

如何实现…

工作原理…

还有更多…

使用随机梯度下降进行回归

准备工作

如何操作…

工作原理…

还有更多…

使用随机梯度下降进行分类

准备工作

如何实现……

它是如何工作的……

还有更多内容……

更多推荐文章

相关免费在线工具

Python 集成树算法与在线学习实战

第九章：生长树

介绍

从树到森林——随机森林

准备开始

如何操作…

它是如何工作的…

还有更多…

生长极端随机树

准备好了…

如何做到这一点…

它是如何工作的…

还有更多内容……

旋转森林的生长

准备工作…

如何实现…

它是如何工作的…

还有更多内容…

第十章 大规模机器学习 – 在线学习

引言

使用感知机作为在线学习算法

准备工作

如何实现…

工作原理…

还有更多…

使用随机梯度下降进行回归

准备工作

如何操作…

工作原理…

还有更多…

使用随机梯度下降进行分类

准备工作

如何实现……

它是如何工作的……

还有更多内容……

微信扫一扫，关注极客日志

更多推荐文章

相关免费在线工具

第十章大规模机器学习 – 在线学习