June 27, 2021

Python h2o package (Deep Learning Library)

h2o package in Python

import h2o
h2o.init()
h2o.init(ip="192.168.1.10", port=54321)
h2o.cluster_info()
h2o.ls()
h2o.remove("py_2")
some_frame = h2o.get_frame("py_7")
some_frame.head()

df = h2o.import_file(path="smalldata/logreg/prostate.csv")
trainFrame = h2o.import_file(path="hdfs://192.168.1.10/user/data/data_test.csv")
fr = h2o.import_file("smalldata/logreg/prostate.csv", col_types = {"CAPSULE":"Enum"})
fraw = h2o.import_file("smalldata/logreg/prostate.csv", parse = False)
h2o.export_file(submission_dataframe, path ="submission_h20_3.csv")
fr.describe()
fsetup = h2o.parse_setup(fraw)
fr = h2o.parse_raw(fsetup)

from h2o.estimators.glm import H2OGeneralizedLinearEstimator
m = H2OGeneralizedLinearEstimator(family="binomial")
model = H2OGeneralizedLinearEstimator()
m.train(x=fr.names[2:], y="CAPSULE", training_frame=fr)
m.__class__
model.coef()
model.null_deviance()
model.residual_deviance()
model.null_deviance(valid=True)

from h2o.estimators.gbm import H2OGradientBoostingEstimator
m = H2OGradientBoostingEstimator(ntrees=10, max_depth=5)
m.train(x=list(set(train.names)-{"label"}), y="label", training_frame=train, validation_frame=test)
preds = model.predict(test)
model = h2o.gbm(y = "IsDepDelayed", x = ["Month"], training_frame = df)
vi = model.varimp(return_list=True)

from h2o.estimators.deeplearning import H2ODeepLearningEstimator
m = H2ODeepLearningEstimator()
model = H2ODeepLearningEstimator(
        distribution="multinomial",
        activation="RectifierWithDropout",
        hidden=[100,200,100],
        input_dropout_ratio=0.2,
        sparse=True,
        l1=1e-5,
        epochs=100)
model.train(
        x= train.names[1:785],
        y=train.names[0],
        training_frame=train_split,
        validation_frame=valid_split)
model.params
model_grid = H2OGridSearch(H2ODeepLearningEstimator, hyper_params=hyper_parameters)
m.train(x=train.names[2:], y=train.names[1], training_frame=train, validation_frame=valid)
model_grid.train(x=x, y=y, distribution="multinomial", epochs=1000, training_frame=train, validation_frame=test, score_interval=2, stopping_rounds=3, stopping_tolerance=0.05, stopping_metric="misclassification")

m.show()
m.model_performance()
m.model_performance(valid=True)
m.model_performance(test_data=test)
m.mse()
m.mse(valid=True)
m.r2()
m.r2(valid=True)
m.confusion_matrix()
m.confusion_matrix(metrics="accuracy")
m.confusion_matrix("min_per_class_accuracy")
metrics = model.model_performance(test_data=test)
metrics.null_degrees_of_freedom()
metrics.aic()

from h2o.grid.grid_search import H2OGridSearch
gs = H2OGridSearch(H2OGradientBoostingEstimator(distribution='multinomial'), hyper_parameters)
gs.train(x=range(0,iris.ncol-1), y=iris.ncol-1, training_frame=iris, nfold=10)
pred = model.predict(test)

Related Python articles: Pytorch Package in Python   networkx, nxviz packages in Python

No comments:

Post a Comment