Note
Go to the end to download the full example code.
LinGAM
https://www.kaggle.com/code/sasakitetsuya/water-quality-causal-inference-by-lingam
https://github.com/KJMAN678/casual_reasoning_lignum/blob/master/LIGNAM.ipynb
from IPython.display import Image
import lingam
from lingam.utils import make_dot, make_prior_knowledge
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from utils import prepare_data
df, _, _ = prepare_data()
df_list = df.columns.to_list()
df_dict = {}
for i, column in zip(range(len(df_list)), df_list):
df_dict[column] = i
prior_knowledge = make_prior_knowledge(
n_variables=15,
exogenous_variables = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
],
sink_variables=[df_dict['Efficiency (%)']],
# paths = [
# [df_dict["pH (Max)"], df_dict["pH (Min)"],[df_dict['Temperature\n?C (Min)'],df_dict['Temperature\n?C (Max)'],df_dict['Dissolved Oxygen (mg/L) (Min)'],df_dict['Dissolved Oxygen (mg/L) (Max)']]],
# ],
no_paths=[(df_dict['Efficiency (%)'], df_dict['Ci (mg/L)']),
],
)
model = lingam.DirectLiNGAM(
random_state=42,
measure = 'pwling',
prior_knowledge = prior_knowledge
)
model.fit(df)
<lingam.direct_lingam.DirectLiNGAM object at 0x7f2d6a478980>
dot = make_dot(
model.adjacency_matrix_,
labels=df.columns.to_list(),
prediction_target_label='Efficiency (%)'
)
dot.format = 'png'
#dot.render('dag')
#Image("dag1.png")
prior_knowledge = make_prior_knowledge(
n_variables=15,
exogenous_variables = [
df_dict["Catalyst type"],
df_dict['Catalyst loading (g/L)'],
df_dict['Light intensity (W)'],
df_dict['time (min)'],
df_dict['solution pH'],
df_dict['Ci (mg/L)'],
df_dict['Anions'],
],
sink_variables=[df_dict['Efficiency (%)']],
# paths = [
# [df_dict["pH (Max)"], df_dict["pH (Min)"],[df_dict['Temperature\n?C (Min)'],df_dict['Temperature\n?C (Max)'],df_dict['Dissolved Oxygen (mg/L) (Min)'],df_dict['Dissolved Oxygen (mg/L) (Max)']]],
# ],
no_paths=[(df_dict['Efficiency (%)'], df_dict['Ci (mg/L)']),
],
)
model = lingam.DirectLiNGAM(
random_state=42,
measure = 'pwling',
prior_knowledge = prior_knowledge
)
model.fit(df)
<lingam.direct_lingam.DirectLiNGAM object at 0x7f2d6a478350>
dot = make_dot(
model.adjacency_matrix_,
labels=df.columns.to_list(),
prediction_target_label='Efficiency (%)'
)
dot.format = 'png'
# dot.render('dag1')
#Image("dag1.png")
reg = LinearRegression(fit_intercept=True)
reg.fit(df.drop(["Efficiency (%)"], axis=1), df["Efficiency (%)"])
features = [i for i in range(df.drop(["Efficiency (%)"], axis=1).shape[1])]
print("features: ", features)
dot = make_dot(
model.adjacency_matrix_,
labels=df.columns.to_list(),
prediction_feature_indices=features,
prediction_target_label='Efficiency (%)',
prediction_coefs=reg.coef_
)
dot.format = 'png'
# dot.render('dag_lr')
dot
features: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
<graphviz.graphs.Digraph object at 0x7f2d6a47ac60>
reg = RandomForestRegressor()
reg.fit(df.drop(["Efficiency (%)"], axis=1), df["Efficiency (%)"])
features = [i for i in range(df.drop(["Efficiency (%)"], axis=1).shape[1])]
print("features: ", features)
dot = make_dot(
model.adjacency_matrix_,
labels=df.columns.to_list(),
prediction_feature_indices=features,
prediction_target_label='Efficiency (%)',
prediction_coefs=reg.feature_importances_
)
dot.format = 'png'
# dot.render('dag_rf')
dot
features: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]
<graphviz.graphs.Digraph object at 0x7f2d6a479520>
Total running time of the script: (0 minutes 1.118 seconds)