from PyQt5.QtGui import *
from PyQt5 import QtGui, QtCore, QtWidgets, uic
import neuroHarmonize as nh
import numpy as np
import pandas as pd
from NiBAx.core import iStagingLogger
[documentos]class Harmonize(QtCore.QObject):
"""This class is an adapter to the worker class."""
#constructor
def __init__(self,datamodel, model):
super(Harmonize,self).__init__()
self.datamodel = datamodel
self.datamodel.harmonization_model = model
[documentos] def DoHarmonization(self):
print('Running harmonization.')
if 'Covariates' in self.datamodel.harmonization_model:
covariates = self.datamodel.harmonization_model['Covariates']
logger.info('Covariates hard-coded in model.')
else:
covariates = ['SITE','Age','Sex','DLICV_baseline']
logger.info('Covariates default to `SITE`, `Age`, `Sex`, and `DLICV_baseline`.')
# create list of new SITEs to loop through
new_sites = set(self.datamodel.data['SITE'].value_counts().index.tolist())^set(self.datamodel.harmonization_model['SITE_labels'])
covars = self.datamodel.data[['SITE','Age','Sex','DLICV_baseline']].reset_index(drop=True).copy()
covars.loc[:,'Sex'] = covars['Sex'].map({'M':1,'F':0})
covars.loc[covars.Age>100, 'Age']=100
# Parameter table for plotting
gamma_ROIs = ['gamma_'+ x for x in self.datamodel.harmonization_model['ROIs']]
delta_ROIs = ['delta_'+ x for x in self.datamodel.harmonization_model['ROIs']]
calculated_gamma = pd.DataFrame([])
calculated_delta = pd.DataFrame([])
if 'UseForComBatGAMHarmonization' in self.datamodel.data.columns:
sites_to_harmonize = []
for site in new_sites:
dataToHarmonize = np.array(self.datamodel.data['SITE']==site,dtype=bool)
training = np.array(self.datamodel.data['UseForComBatGAMHarmonization'].notnull(),dtype=bool)
new_site_is_train = np.logical_and(dataToHarmonize, training)
new_site_is_train = new_site_is_train[~np.isnan(new_site_is_train).any(axis=0)]
if np.count_nonzero(new_site_is_train)<5:
site_gamma = pd.DataFrame(np.nan,columns=gamma_ROIs,index=[site])
calculated_gamma = calculated_gamma.append(site_gamma)
site_delta = pd.DataFrame(np.nan,columns=delta_ROIs,index=[site])
calculated_delta = calculated_delta.append(site_delta)
print('New site `'+site+'` has less than 25 reference data points. Skipping harmonization.')
continue
else:
print('Harmonizing '+ site)
sites_to_harmonize.append(site)
if not sites_to_harmonize:
print('No new sites that meet out-of-sample harmonization requirement. Proceeding with harmonization.')
bayes_data, stand_mean = nh.harmonizationApply(self.datamodel.data[[x for x in self.datamodel.harmonization_model['ROIs']]].values,
covars,
self.datamodel.harmonization_model,True)
gamma_ROIs = ['gamma_'+ x for x in self.datamodel.harmonization_model['ROIs']]
delta_ROIs = ['delta_'+ x for x in self.datamodel.harmonization_model['ROIs']]
model_gamma= pd.DataFrame(self.datamodel.harmonization_model['gamma_star'],columns=gamma_ROIs,index=[x for x in self.datamodel.harmonization_model['SITE_labels']])
model_delta = pd.DataFrame(self.datamodel.harmonization_model['delta_star'],columns=delta_ROIs,index=[x for x in self.datamodel.harmonization_model['SITE_labels']])
parameters = pd.concat([model_gamma,model_delta],axis=1).sort_index()
else:
oos_data = self.datamodel.data[self.datamodel.data['SITE'].isin(sites_to_harmonize)].dropna(subset=covariates)[[x for x in self.datamodel.harmonization_model['ROIs']]].values
oos_covars = self.datamodel.data[self.datamodel.data.SITE.isin(sites_to_harmonize)].dropna(subset=covariates)[covariates]
oos_covars.loc[:,'Sex'] = oos_covars['Sex'].map({'M':1,'F':0})
self.model, _ = nh.harmonizationLearn(oos_data, oos_covars,
smooth_terms=['Age'],
smooth_term_bounds=(np.floor(np.min(self.datamodel.data.Age)),np.ceil(np.max(self.datamodel.data.Age))),
orig_model=self.datamodel.harmonization_model,seed=20220601)
bayes_data, stand_mean = nh.harmonizationApply(self.datamodel.data[[x for x in self.datamodel.harmonization_model['ROIs']]].values,
covars,
self.model,True)
gamma_ROIs = ['gamma_'+ x for x in self.model['ROIs']]
delta_ROIs = ['delta_'+ x for x in self.model['ROIs']]
model_gamma= pd.DataFrame(self.model['gamma_star'],columns=gamma_ROIs,index=[x for x in self.model['SITE_labels']])
model_delta = pd.DataFrame(self.model['delta_star'],columns=delta_ROIs,index=[x for x in self.model['SITE_labels']])
parameters = pd.concat([model_gamma,model_delta],axis=1).sort_index()
else:
print('Skipping out-of-sample harmonization because `UseForComBatGAMHarmonization` does not exist.')
for site in new_sites:
site_gamma = pd.DataFrame(np.nan,columns=gamma_ROIs,index=[site])
calculated_gamma = calculated_gamma.append(site_gamma)
site_delta = pd.DataFrame(np.nan,columns=delta_ROIs,index=[site])
calculated_delta = calculated_delta.append(site_delta)
# populate calculated parameter table
calculated_parameters = pd.concat([calculated_gamma,calculated_delta],axis=1).sort_index()
gamma_ROIs = ['gamma_'+ x for x in self.datamodel.harmonization_model['ROIs']]
delta_ROIs = ['delta_'+ x for x in self.datamodel.harmonization_model['ROIs']]
model_gamma= pd.DataFrame(self.datamodel.harmonization_model['gamma_star'],columns=gamma_ROIs,index=[x for x in self.datamodel.harmonization_model['SITE_labels']])
model_delta = pd.DataFrame(self.datamodel.harmonization_model['delta_star'],columns=delta_ROIs,index=[x for x in self.datamodel.harmonization_model['SITE_labels']])
model_parameters = pd.concat([model_gamma,model_delta],axis=1).sort_index()
parameters = pd.concat([model_parameters,calculated_parameters],axis=0).sort_index()
bayes_data, stand_mean = nh.harmonizationApply(self.datamodel.data[[x for x in self.datamodel.harmonization_model['ROIs']]].values,
covars,
self.datamodel.harmonization_model,True)
Raw_ROIs_Residuals = self.datamodel.data[self.datamodel.harmonization_model['ROIs']].values - stand_mean
if 'isTrainMUSEHarmonization' in self.datamodel.data.columns:
muse = pd.concat([self.datamodel.data['isTrainMUSEHarmonization'].reset_index(drop=True).copy(), covars, pd.DataFrame(bayes_data, columns=['H_' + s for s in self.datamodel.harmonization_model['ROIs']])],axis=1)
else:
muse = pd.concat([covars,pd.DataFrame(bayes_data, columns=['H_' + s for s in self.datamodel.harmonization_model['ROIs']])],axis=1)
# harmonize derived volumes
if ('MUSE_Volume_301' not in list(self.datamodel.harmonization_model['ROIs'])):
logger.info('No derived volumes in model.')
logger.info('Calculating using derived mapping dictionary.')
MUSEDictDataFrame= self.datamodel.GetMUSEDictDataFrame()
muse_mappings = self.datamodel.GetDerivedMUSEMap()
for ROI in MUSEDictDataFrame[MUSEDictDataFrame['ROI_LEVEL']=='DERIVED']['ROI_INDEX']:
single_ROIs = muse_mappings.loc[ROI].replace('NaN',np.nan).dropna().astype(np.float)
single_ROIs = ['H_MUSE_Volume_%0d' % x for x in single_ROIs]
muse['H_MUSE_Volume_%d' % ROI] = muse[single_ROIs].sum(axis=1,skipna=False)
muse.drop(columns=['H_MUSE_Volume_702'], inplace=True)
start_index = len(self.datamodel.harmonization_model['SITE_labels'])
sex_icv_effect = np.dot(muse[['Sex','DLICV_baseline']].copy(), self.datamodel.harmonization_model['B_hat'][start_index:(start_index+2),:])
ROIs_ICV_Sex_Residuals = ['RES_ICV_Sex_' + x for x in self.datamodel.harmonization_model['ROIs']]
muse.loc[:,ROIs_ICV_Sex_Residuals] = muse[['H_' + x for x in self.datamodel.harmonization_model['ROIs']]].values - sex_icv_effect
muse.loc[:,'Sex'] = muse['Sex'].map({1:'M',0:'F'})
ROIs_Residuals = ['RES_' + x for x in self.datamodel.harmonization_model['ROIs']]
RAW_Residuals = ['RAW_RES_' + x for x in self.datamodel.harmonization_model['ROIs']]
muse.loc[:,ROIs_Residuals] = bayes_data-stand_mean
muse.loc[:,RAW_Residuals] = Raw_ROIs_Residuals
print('Harmonization done.')
return muse, parameters
[documentos] def AddHarmonizedMUSE(self, muse):
print('Saving modified data to pickle file...')
muse.set_index(self.datamodel.data.index,inplace=True)
ROI_list = list(self.datamodel.harmonization_model['ROIs'])
if ('MUSE_Volume_301' not in ROI_list):
logger.info('No derived volumes in model')
MUSEDictDataFrame= self.datamodel.GetMUSEDictDataFrame()
Derived_numbers = list(MUSEDictDataFrame[MUSEDictDataFrame['ROI_LEVEL']=='DERIVED']['ROI_INDEX'])
Derived_MUSE_Volumes = list('MUSE_Volume_' + str(x) for x in Derived_numbers)
ROI_list = ROI_list + Derived_MUSE_Volumes
ROI_list.remove('MUSE_Volume_702')
else:
logger.info('Model includes derived volumes')
H_ROIs = list('H_' + str(x) for x in ROI_list)
ROIs_ICV_Sex_Residuals = ['RES_ICV_Sex_' + x for x in self.datamodel.harmonization_model['ROIs']]
ROIs_Residuals = ['RES_' + x for x in self.datamodel.harmonization_model['ROIs']]
RAW_Residuals = ['RAW_RES_' + x for x in self.datamodel.harmonization_model['ROIs']]
if ('H_MUSE_Volume_47' not in self.datamodel.data.keys()):
self.datamodel.data.loc[:,H_ROIs] = muse[H_ROIs]
self.datamodel.data.loc[:,ROIs_ICV_Sex_Residuals] = muse[ROIs_ICV_Sex_Residuals]
self.datamodel.data.loc[:,ROIs_Residuals] = muse[ROIs_Residuals]
self.datamodel.data.loc[:,RAW_Residuals] = muse[RAW_Residuals]