"""
===========================
LinGAM
===========================

https://lingam.readthedocs.io

https://www.kaggle.com/code/sasakitetsuya/water-quality-causal-inference-by-lingam

https://github.com/KJMAN678/casual_reasoning_lignum/blob/master/LIGNAM.ipynb

"""
from IPython.display import Image
import lingam
from lingam.utils import make_dot, make_prior_knowledge

from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

from utils import prepare_data

# %%

df, _, _ = prepare_data()

# %%

df_list = df.columns.to_list()
df_dict = {}

for i, column in zip(range(len(df_list)), df_list):
    df_dict[column] = i

# %%
prior_knowledge = make_prior_knowledge(
    n_variables=15,
    exogenous_variables = [
                    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
                    ],
    sink_variables=[df_dict['Efficiency (%)']],
    # paths = [
    #          [df_dict["pH (Max)"], df_dict["pH (Min)"],[df_dict['Temperature\n?C (Min)'],df_dict['Temperature\n?C (Max)'],df_dict['Dissolved Oxygen (mg/L) (Min)'],df_dict['Dissolved Oxygen (mg/L) (Max)']]],
    #          ],     
    no_paths=[(df_dict['Efficiency (%)'], df_dict['Ci (mg/L)']),
              
              ],
                           
)

# %%

model = lingam.DirectLiNGAM(
                    random_state=42,
                    measure = 'pwling',
                    prior_knowledge = prior_knowledge
                    )

model.fit(df)

# %%

dot = make_dot(
    model.adjacency_matrix_,
    labels=df.columns.to_list(),
    prediction_target_label='Efficiency (%)'
    )


dot.format = 'png'
#dot.render('dag')
#Image("dag1.png")

# %%
prior_knowledge = make_prior_knowledge(
    n_variables=15,
    exogenous_variables = [
                    df_dict["Catalyst type"],
                    df_dict['Catalyst loading (g/L)'],
                    df_dict['Light intensity (W)'],
                    df_dict['time (min)'],
                    df_dict['solution pH'],
                    df_dict['Ci (mg/L)'],
                    df_dict['Anions'],
                    ],
    sink_variables=[df_dict['Efficiency (%)']],
    # paths = [
    #          [df_dict["pH (Max)"], df_dict["pH (Min)"],[df_dict['Temperature\n?C (Min)'],df_dict['Temperature\n?C (Max)'],df_dict['Dissolved Oxygen (mg/L) (Min)'],df_dict['Dissolved Oxygen (mg/L) (Max)']]],
    #          ],     
    no_paths=[(df_dict['Efficiency (%)'], df_dict['Ci (mg/L)']),
              
              ],
                           
)

# %%

model = lingam.DirectLiNGAM(
                    random_state=42,
                    measure = 'pwling',
                    prior_knowledge = prior_knowledge
                    )

model.fit(df)

# %%

dot = make_dot(
    model.adjacency_matrix_,
    labels=df.columns.to_list(),
    prediction_target_label='Efficiency (%)'
    )


dot.format = 'png'
# dot.render('dag1')
#Image("dag1.png")

# %%

reg = LinearRegression(fit_intercept=True)
reg.fit(df.drop(["Efficiency (%)"], axis=1), df["Efficiency (%)"])

features = [i for i in range(df.drop(["Efficiency (%)"], axis=1).shape[1])]
print("features: ", features)

dot = make_dot(
    model.adjacency_matrix_,
    labels=df.columns.to_list(),
    prediction_feature_indices=features, 
    prediction_target_label='Efficiency (%)',
    prediction_coefs=reg.coef_
    )

dot.format = 'png'
# dot.render('dag_lr')
dot

# %%

reg = RandomForestRegressor()
reg.fit(df.drop(["Efficiency (%)"], axis=1), df["Efficiency (%)"])

features = [i for i in range(df.drop(["Efficiency (%)"], axis=1).shape[1])]
print("features: ", features)

dot = make_dot(
    model.adjacency_matrix_,
    labels=df.columns.to_list(),
    prediction_feature_indices=features, 
    prediction_target_label='Efficiency (%)',
    prediction_coefs=reg.feature_importances_
    )

dot.format = 'png'
# dot.render('dag_rf')
dot

