# Copyright 2023 The HIP team, University Hospital of Lausanne (CHUV), Switzerland & Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module that defines the class dedicated to the main window of the MIP Dataset Mapper UI application."""
# External imports
import ast
import os
import json
from pathlib import Path
import pandas as pd
from PySide2.QtGui import QIcon
from PySide2.QtCore import Qt, QRect, QMetaObject, QCoreApplication, QSize
from PySide2.QtWidgets import (
QAction,
QWidget,
QGridLayout,
QSplitter,
QGroupBox,
QTableView,
QFormLayout,
QPushButton,
QLabel,
QStatusBar,
QFileDialog,
QMessageBox,
QAbstractItemView,
QComboBox,
QToolBar,
QSizePolicy,
QLineEdit,
QHeaderView,
QHBoxLayout,
QVBoxLayout,
QInputDialog,
)
import pkg_resources
# Internal imports
from mip_dmp.utils.io import load_mapping_json
from mip_dmp.process.mapping import (
map_dataset,
MAPPING_TABLE_COLUMNS,
)
from mip_dmp.process.matching import (
match_columns_to_cdes, match_column_to_cdes
)
from mip_dmp.qt5.model.table_model import (
# NoEditorDelegate,
PandasTableModel,
)
from mip_dmp.qt5.components.embedding_visualization_widget import (
WordEmbeddingVisualizationWidget,
)
from mip_dmp.qt5.components.matching_visualization_widget import (
MatchingVisualizationWidget,
)
# Constants
WINDOW_NAME = "MIP Dataset Mapper"
NB_KEPT_MATCHES = 819 # for all FERES variables
[docs]class MIPDatasetMapperWindow(object):
"""Class for the main window of the MIP Dataset Mapper UI application."""
__slots__ = [
"__weakref__",
"centralWidgetGridLayout",
"centralWidgetSplitter",
"centralwidget",
"columnsCDEsMappingGroupBox",
"columnsCDEsMappingGroupBoxLayout",
"columnsCDEsMappingSplitter",
"inputDatasetFormLayout",
"inputDatasetFormLayoutWidget",
"inputDatasetGroupBox",
"inputDatasetGroupBoxLayout",
"inputDatasetLoadButton",
"inputDatasetPathLabel",
"inputDatasetTableView",
"leftCentralWidgetSplitter",
"mapButton",
"mappingFilePathLabel",
"mappingFormLayout",
"mappingFormLayoutWidget",
"mappingLoadButton",
"mappingSaveButton",
"mappingCheckButton",
"mappingTableView",
"outputDirectoryLabel",
"outputDirectorySelectButton",
"outputFilenameLabel",
"outputFilenameSelectButton",
"outputFormLayout",
"outputFormLayoutWidget",
"outputGroupBox",
"outputGroupBoxLayout",
"rightCentralWidgetSplitter",
"targetCDEsFormLayout",
"targetCDEsFormLayoutWidget",
"targetCDEsGroupBox",
"targetCDEsGroupBoxLayout",
"targetCDEsLoadButton",
"targetCDEsPathLabel",
"targetCDEsTableView",
"inputDatasetPath",
"inputDataset",
"inputDatasetColumns",
"inputDatasetPandasModel",
"targetCDEsPath",
"targetCDEs",
"targetCDEsPandasModel",
"mappingFilePath",
"columnsCDEsMappingData",
"columnsCDEsMappingPandasModel",
"mappingTableRowUpdateGroupBox",
"mappingTableRowUpdateGroupBoxLayout",
"mappingRowIndex",
"mappingTableViewWidget",
"mappingTableViewLayout",
"mappingTableViewAddDeleteRowWidget",
"mappingTableViewAddDeleteRowLayout",
"mappingTableViewAddRowButton",
"mappingTableViewDeleteRowButton",
"datasetColumn",
"cdeCode",
"cdeType",
"transformType",
"transform",
"matchedCdeCodes",
"updateMappingRowButton",
"outputDirectoryPath",
"outputFilename",
"statusbar",
"toolBar",
"mappingInitLabel",
"initMatchingMethod",
"mappingInitButton",
"embeddingVizButton",
"embeddingFigure",
"embeddingWidget",
"embeddingWidgetLayout",
"embeddingCanvas",
"inputDatasetColumnEmbeddings",
"targetCDEsEmbeddings",
"matchingVizButton",
"matchingWidget",
]
def __init__(self, mainWindow):
"""Initialize the main window of the MIP Dataset Mapper UI application.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
# Adjust the window size, Qt Style Sheet, and title
self.adjustWindow(mainWindow)
# Create the UI components
self.createComponents(mainWindow)
# Create the tool bar
self.createToolBar(mainWindow)
# Add click listener functions to the Button elements
self.connectButtons()
# Add Widgets to the layouts
self.adjustWidgetsAndLayouts()
# Set the central widget
mainWindow.setCentralWidget(self.centralwidget)
# Set the status and tool bars
mainWindow.setStatusBar(self.statusbar)
mainWindow.addToolBar(self.toolBar)
# Search recursively for all child objects of the given object, and
# connect matching signals from them to slots of object
QMetaObject.connectSlotsByName(mainWindow)
# Set the initial state of the UI where the save mapping and
# map buttons are disabled
self.disableMappingInitItems()
self.disableMappingMapButtons()
# Set the initial state of the UI where the mapping table and
# the mapping row editor are disabled
self.disableMappingComponents()
[docs] def adjustWindow(self, mainWindow):
"""Adjust the window size, Qt Style Sheet, and title.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
if not mainWindow.objectName():
mainWindow.setObjectName(f"{WINDOW_NAME}")
mainWindow.resize(1280, 720)
# Set the window Qt Style Sheet
styleSheetFile = pkg_resources.resource_filename(
"mip_dmp", os.path.join("qt5", "assets", "stylesheet.qss")
)
with open(styleSheetFile, "r") as fh:
mainWindow.setStyleSheet(fh.read())
# Set the window icon
# mainWindow.setWindowIcon(QIcon(":/images/mip_logo.png"))
# Set the window title
mainWindow.setWindowTitle(
QCoreApplication.translate(f"{WINDOW_NAME}", f"{WINDOW_NAME}", None)
)
[docs] def createComponents(self, mainWindow):
"""Create the UI components.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
# Initialize the central widget
self.centralwidget = QWidget(mainWindow)
self.centralWidgetGridLayout = QGridLayout(self.centralwidget)
# Initialize the different main splitters
self.centralWidgetSplitter = QSplitter(Qt.Horizontal)
self.leftCentralWidgetSplitter = QSplitter(Qt.Vertical)
self.rightCentralWidgetSplitter = QSplitter(Qt.Vertical)
# Initialize components of the input dataset group box (top left)
self.createInputDatasetComponents(mainWindow)
# Initialize components of the target CDEs group box (bottom left)
self.createTargetCDEsComponents(mainWindow)
# Initialize components of the columns CDEs mapping group box (top right)
self.createMappingComponents(mainWindow)
# Create the status bar
self.statusbar = QStatusBar(mainWindow)
[docs] def createTargetCDEsComponents(self, mainWindow):
"""Create the components of the target CDEs group box.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
self.targetCDEsGroupBox = QGroupBox(self.centralwidget)
# Set the layout of the group box
self.targetCDEsGroupBoxLayout = QGridLayout()
# Set the table view
self.targetCDEsTableView = QTableView(self.targetCDEsGroupBox)
self.targetCDEsTableView.setGeometry(QRect(10, 70, 341, 101))
self.targetCDEsTableView.setEditTriggers(QAbstractItemView.NoEditTriggers)
# Set the form layout with button to load the CDEs file
self.targetCDEsFormLayoutWidget = QWidget(self.targetCDEsGroupBox)
self.targetCDEsFormLayoutWidget.setGeometry(QRect(10, 30, 341, 31))
self.targetCDEsFormLayout = QFormLayout(self.targetCDEsFormLayoutWidget)
self.targetCDEsFormLayout.setContentsMargins(0, 0, 0, 0)
self.targetCDEsLoadButton = QAction(
QIcon(
pkg_resources.resource_filename("mip_dmp", "qt5/assets/load_cdes.png")
),
"Load CDE file",
mainWindow,
)
self.targetCDEsLoadButton.setToolTip(
"Load CDEs metadata schema file (.xlxs format)"
)
self.targetCDEsPathLabel = QLabel(self.targetCDEsFormLayoutWidget)
# Set text of the components
self.targetCDEsGroupBox.setTitle(
QCoreApplication.translate(
f"{WINDOW_NAME}", "Target CDEs Metadata Schema", None
)
)
self.targetCDEsPathLabel.setText(
QCoreApplication.translate(
f"{WINDOW_NAME}",
"<Please load a CDEs metadata schema file in .xlxs format>",
None,
)
)
[docs] def createMappingComponents(self, mainWindow):
"""Create the components of the mapping group box.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
self.columnsCDEsMappingGroupBox = QGroupBox(self.centralwidget)
# Set the layout of the group box
self.columnsCDEsMappingGroupBoxLayout = QGridLayout()
# Set the form to load the mapping file
self.mappingFormLayoutWidget = QWidget(self.columnsCDEsMappingGroupBox)
self.mappingFormLayoutWidget.setGeometry(QRect(10, 30, 371, 31))
self.mappingFormLayout = QFormLayout(self.mappingFormLayoutWidget)
self.mappingFormLayout.setContentsMargins(0, 0, 0, 0)
self.mappingLoadButton = QAction(
QIcon(
pkg_resources.resource_filename(
"mip_dmp", "qt5/assets/load_mapping.png"
)
),
"Load mapping file",
mainWindow,
)
self.mappingLoadButton.setToolTip(
"Load Columns / CDEs mapping file (.json format)"
)
self.mappingFilePathLabel = QLabel(self.mappingFormLayoutWidget)
# Set the splitter for the mapping table and the new entry form
self.columnsCDEsMappingSplitter = QSplitter(Qt.Vertical)
self.mappingTableViewWidget = QWidget(self.columnsCDEsMappingGroupBox)
self.mappingTableViewLayout = QVBoxLayout()
# Set the mapping table
self.mappingTableView = QTableView(self.columnsCDEsMappingGroupBox)
self.mappingTableView.setGeometry(QRect(10, 70, 371, 231))
self.mappingTableView.horizontalHeader().setSectionResizeMode(
QHeaderView.Stretch
)
self.mappingTableView.horizontalHeader().setVisible(True)
self.mappingTableViewAddDeleteRowWidget = QWidget(
self.columnsCDEsMappingGroupBox
)
self.mappingTableViewAddDeleteRowLayout = QHBoxLayout()
self.mappingTableViewAddRowButton = QPushButton(self.columnsCDEsMappingGroupBox)
self.mappingTableViewAddRowButton.setToolTip(
"Add a new row to the mapping table"
)
self.mappingTableViewAddRowButton.setText(
QCoreApplication.translate(f"{WINDOW_NAME}", "Add", None)
)
self.mappingTableViewDeleteRowButton = QPushButton(
self.columnsCDEsMappingGroupBox
)
self.mappingTableViewDeleteRowButton.setToolTip(
"Delete the selected row from the mapping table"
)
self.mappingTableViewDeleteRowButton.setText(
QCoreApplication.translate(f"{WINDOW_NAME}", "Delete", None)
)
# Create group box for entering a new entry to the mapping table
self.mappingTableRowUpdateGroupBox = QGroupBox()
# Create a form widget to edit row of mapping table
self.createMappingTableRowViewComponents()
self.mappingTableRowUpdateGroupBox.setTitle(
QCoreApplication.translate(f"{WINDOW_NAME}", "Mapping Row Editor", None)
)
# Create the save button
self.mappingSaveButton = QAction(
QIcon(
pkg_resources.resource_filename(
"mip_dmp", "qt5/assets/save_mapping.png"
)
),
"Save mapping file",
mainWindow,
)
self.mappingSaveButton.setToolTip(
"Save Columns / CDEs mapping file (.json format)"
)
self.mappingCheckButton = QAction(
QIcon(
pkg_resources.resource_filename(
"mip_dmp", "qt5/assets/check_mapping.png"
)
),
"Check Columns / CDEs mapping",
mainWindow,
)
self.mappingCheckButton.setToolTip("Check Columns / CDEs mapping")
# Set text of the components
self.columnsCDEsMappingGroupBox.setTitle(
QCoreApplication.translate(f"{WINDOW_NAME}", "Columns / CDEs Mapping", None)
)
self.mappingFilePathLabel.setText(
QCoreApplication.translate(
f"{WINDOW_NAME}",
"<Please save/ load a Columns / CDEs mapping file in .json format...>",
None,
)
)
[docs] def createMappingTableRowViewComponents(self):
"""Create the components of the mapping table row editor group box."""
# Create a form layout for the mapping group box
self.mappingTableRowUpdateGroupBoxLayout = QFormLayout()
# Setup the widgets
self.mappingRowIndex = QLabel(self.columnsCDEsMappingGroupBox)
self.datasetColumn = QLabel(self.columnsCDEsMappingGroupBox)
self.cdeCode = QComboBox(self.columnsCDEsMappingGroupBox)
icon = pkg_resources.resource_filename(
"mip_dmp", os.path.join("qt5", "assets", "down_arrow.png")
)
self.cdeCode.setStyleSheet(
f"QComboBox::down-arrow {{ image: url({icon}); height: 16px; width: 16px; }}"
)
self.cdeType = QLabel(self.columnsCDEsMappingGroupBox)
self.transformType = QLabel(self.columnsCDEsMappingGroupBox)
self.transform = QLineEdit(self.columnsCDEsMappingGroupBox)
self.updateMappingRowButton = QPushButton(
"Update row", self.columnsCDEsMappingGroupBox
)
# Add widgets to the form layout
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("Mapping Table Row Index"), self.mappingRowIndex
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("Dataset Column"), self.datasetColumn
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("CDE Code"), self.cdeCode
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("CDE Type"), self.cdeType
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("Transform Type"), self.transformType
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel("Transform"), self.transform
)
self.mappingTableRowUpdateGroupBoxLayout.addRow(
QLabel(), self.updateMappingRowButton
)
[docs] def embeddingViz(self):
"""Open the embedding visualization window."""
self.embeddingWidget = WordEmbeddingVisualizationWidget()
print(
"Launch visualization widget with matching method: "
f"{self.initMatchingMethod.currentText()}"
)
if self.initMatchingMethod.currentText() != "fuzzy":
self.embeddingWidget.set_wordcombobox_items(self.inputDatasetColumns)
self.embeddingWidget.set_embeddings(
self.inputDatasetColumnEmbeddings,
self.inputDatasetColumns,
self.targetCDEsEmbeddings,
list(self.targetCDEs["code"].unique()),
self.matchedCdeCodes,
self.initMatchingMethod.currentText(),
)
self.embeddingWidget.generate_embedding_figure()
self.embeddingWidget.show()
else:
QMessageBox().warning(
None,
"Warning",
"Embedding visualization is not available for fuzzy matching.",
)
[docs] def matchingViz(self):
"""Open the matching visualization window."""
self.matchingWidget = MatchingVisualizationWidget(
self.inputDatasetColumns,
self.targetCDEs["code"].unique().tolist(),
self.matchedCdeCodes,
self.initMatchingMethod.currentText(),
None,
)
self.matchingWidget.set_wordcombobox_items(self.inputDatasetColumns)
print(
"Launch matching visualization widget "
f"(matching method: {self.initMatchingMethod.currentText()})"
)
self.matchingWidget.generate_heatmap_figure()
self.matchingWidget.show()
[docs] def addMappingTableRow(self):
"""Add a row to the mapping table."""
# Show a dialog to enter the dataset column name
# it is given the choice to select from the list of dataset columns.
# If the user selects a column name from the list, the CDE code is
# automatically filled in with the best match.
datasetColumn, ok = QInputDialog().getItem(
None,
"Select dataset column to add to the mapping table",
"Dataset column:",
self.inputDatasetColumns,
0,
False,
)
if ok and datasetColumn is not None and datasetColumn != "":
if self.matchedCdeCodes:
# Get the fuzzy matches list for the dataset column
# and set the CDE code and type to the first match
columnMatches = self.matchedCdeCodes[datasetColumn]["words"]
else:
# If the matchedCdeCodes dictionary is empty, then the user
# has not yet initialized the mapping with fuzzy matching, and
# we propose the full list of CDEs ordered by fuzzy match
columnMatches = match_column_to_cdes(datasetColumn, self.targetCDEs)
cdeCode = columnMatches[0]
cdeType = self.targetCDEs[self.targetCDEs["code"] == cdeCode][
"type"
].unique()[0]
if cdeType == "real" or cdeType == "integer":
transformType = "scale"
transform = "1.0"
else:
transformType = "map"
transform = '{ "X": "Y", "Y": "X" }'
newRow = {
"dataset_column": datasetColumn,
"cde_code": cdeCode,
"cde_type": cdeType,
"transform_type": transformType,
"transform": transform,
}
# Use the loc method to add the new row to the DataFrame
self.columnsCDEsMappingData.loc[len(self.columnsCDEsMappingData)] = newRow
# Update the table
self.mappingTableView.model().layoutChanged.emit()
successMsg = (
"New row for dataset column '{}' added to the mapping table!".format(
datasetColumn
)
)
QMessageBox.information(None, "Success", successMsg)
self.statusbar.showMessage(successMsg)
self.statusbar.repaint()
else:
warnMsg = "No dataset column selected!"
QMessageBox.warning(None, "Warning", warnMsg)
self.statusbar.showMessage(warnMsg)
self.statusbar.repaint()
[docs] def deleteMappingTableRow(self):
"""Delete the selected row from the mapping table."""
# Get the selected row index
index = self.mappingTableView.selectedIndexes()[0]
# Delete the row from the DataFrame
self.columnsCDEsMappingData.drop(index=index.row(), inplace=True)
self.columnsCDEsMappingData.reset_index(drop=True, inplace=True)
# Update the table
self.mappingTableView.model().layoutChanged.emit()
successMsg = "Row {} deleted from the mapping table!".format(index.row())
QMessageBox.information(None, "Success", successMsg)
self.statusbar.showMessage(successMsg)
self.statusbar.repaint()
[docs] def updateMappingTableRow(self):
"""Update the selected row of the mapping table with the data of the form."""
# Get the data from the form
rowIndex = int(self.mappingRowIndex.text())
datasetColumn = self.datasetColumn.text()
cdeCode = self.cdeCode.currentText()
cdeType = self.cdeType.text()
transformType = self.transformType.text()
transform = self.transform.text()
# Update the data in the table
self.columnsCDEsMappingData.iloc[rowIndex, :] = [
datasetColumn,
cdeCode,
cdeType,
transformType,
transform,
]
# Update the table
self.mappingTableView.model().layoutChanged.emit()
[docs] def loadCDEsFile(self):
"""Load the CDEs file."""
self.targetCDEsPath = QFileDialog.getOpenFileName(
None, "Select the CDEs file", "", "Excel files (*.xlsx)"
)
self.targetCDEsPathLabel.setText(self.targetCDEsPath[0])
if not os.path.exists(self.targetCDEsPath[0]):
self.targetCDEsPathLabel.setText(
QCoreApplication.translate(
f"{WINDOW_NAME}", "<Please load a CDEs file in .xlxs>", None
)
)
errMsg = (
f"The CDEs file {self.targetCDEsPath[0]} does not exist. "
"Please select a valid file!"
)
QMessageBox.warning(
None,
"Error",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
self.disableMappingComponents()
else:
self.targetCDEs = pd.read_excel(self.targetCDEsPath[0])
self.targetCDEsPandasModel = PandasTableModel(self.targetCDEs)
self.targetCDEsTableView.setModel(self.targetCDEsPandasModel)
successMsg = f"Loaded CDEs file {self.targetCDEsPath[0]}"
self.updateStatusbar(successMsg)
if hasattr(self, "inputDatasetPath") and os.path.exists(
self.inputDatasetPath[0]
):
self.initMapping()
self.enableMappingInitItems()
self.enableMappingButtons()
self.enableMappingComponents()
else:
self.disableMappingMapButtons()
self.disableMappingInitItems()
self.disableMappingComponents()
[docs] def loadMapping(self):
"""Load the mapping file."""
self.mappingFilePath = QFileDialog.getOpenFileName(
None, "Select the mapping file", "", "JSON files (*.json)"
)
self.mappingFilePathLabel.setText(self.mappingFilePath[0])
if not os.path.exists(self.mappingFilePath[0]):
self.mappingFilePathLabel.setText(
QCoreApplication.translate(
f"{WINDOW_NAME}",
"<Please load an existing mapping json file...>",
None,
)
)
errMsg = (
f"The mapping file {self.mappingFilePath[0]} does not exist. "
"Please select a valid file!"
)
QMessageBox.warning(
None,
"Error",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
else:
try:
# Load the mapping table file in JSON format
self.columnsCDEsMappingData = load_mapping_json(self.mappingFilePath[0])
print(f"Mapping loaded from {self.mappingFilePath[0]}")
# Create a pandas model for the mapping table
self.columnsCDEsMappingPandasModel = PandasTableModel(
self.columnsCDEsMappingData
)
# Set the model of the table view to the pandas model
self.mappingTableView.setModel(self.columnsCDEsMappingPandasModel)
self.mappingTableView.setSelectionBehavior(
self.mappingTableView.SelectRows
)
self.mappingTableView.setSelectionMode(
self.mappingTableView.SingleSelection
)
self.mappingTableView.setEditTriggers(
self.mappingTableView.NoEditTriggers
) # disable editing
# Handle the mapping table view row selection changed signal
self.mappingTableView.selectionModel().currentRowChanged.connect(
self.initializeMappingEditForm
)
# Select the first row of the mapping table view at the beginning
indexRow = 0
self.mappingTableView.selectRow(indexRow)
# Handle the combox box current index changed signal for the CDE code column
self.cdeCode.currentIndexChanged.connect(self.updateMappingEditForm)
# Display a success message
successMsg = (
f"Loaded mapping file {self.mappingFilePath[0]}. \n"
"Please Check the mapping, Save it and Click on the "
"Map button to map the input dataset."
)
QMessageBox.information(
None,
"Success",
successMsg,
)
self.updateStatusbar(successMsg)
except ValueError as e:
# Display an error message
errMsg = (
f"The mapping file {self.mappingFilePath[0]} is not valid: {repr(e)} \n"
"Please select a valid file! "
)
QMessageBox.warning(
None,
"Error",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
self.enableMappingButtons()
[docs] def saveMapping(self):
"""Save the mapping file."""
self.mappingFilePath = QFileDialog.getSaveFileName(
None, "Select the mapping file", "", "JSON files (*.json)"
)
path = Path(self.mappingFilePath[0])
if path.suffix != ".json":
errMsg = (
f"The mapping file {self.mappingFilePath[0]} does not have a .json extension. "
"Please select a valid file!"
)
QMessageBox.warning(
None,
"Error",
errMsg,
)
return
# Create the directories if they do not exist
os.makedirs(path.parent, exist_ok=True)
# Convert the mapping data frame to a json file
self.columnsCDEsMappingData.to_json(
self.mappingFilePath[0], orient="records", indent=4
)
print(f"Mapping saved to {self.mappingFilePath[0]}")
self.mappingFilePathLabel.setText(self.mappingFilePath[0])
successMsg = f"Mapping saved to {self.mappingFilePath[0]}!"
QMessageBox.information(
None,
"Success",
successMsg,
)
self.updateStatusbar(successMsg)
self.mapButton.setEnabled(True)
[docs] def disableMappingInitItems(self):
"""Disable the mapping initialization items."""
self.mappingInitButton.setEnabled(False)
self.initMatchingMethod.setEnabled(False)
self.embeddingVizButton.setEnabled(False)
self.matchingVizButton.setEnabled(False)
[docs] def enableMappingInitItems(self):
"""Enable the mapping initialization items."""
self.mappingInitButton.setEnabled(True)
self.initMatchingMethod.setEnabled(True)
[docs] def disableMappingComponents(self):
"""Disable the mapping components."""
self.columnsCDEsMappingGroupBox.setEnabled(False)
self.mappingTableRowUpdateGroupBox.setEnabled(False)
self.mappingFilePathLabel.setEnabled(False)
[docs] def enableMappingComponents(self):
"""Enable the mapping components."""
self.columnsCDEsMappingGroupBox.setEnabled(True)
self.mappingTableRowUpdateGroupBox.setEnabled(True)
self.mappingFilePathLabel.setEnabled(True)
[docs] def checkMapping(self):
"""Check the content of the mapping table.
This function checks if:
* The mapping table contains unique mapping to a specific CDE code.
* The mapping contains only valid CDE codes.
* The mapping transform is correctly formatted.
"""
# Check if the mapping table contains unique mapping to a specific CDE code
if len(self.columnsCDEsMappingData["cde_code"].unique()) != len(
self.columnsCDEsMappingData["cde_code"]
):
errMsg = (
"The mapping is not valid. "
"Please check it and remove any mapping row "
"that might map multiple columns of the input dataset "
"to the same CDE code!"
)
QMessageBox.warning(
None,
"Error: Duplicated mapped CDE code",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
return
# Check if the mapping table contains unique mapping of a column of the input dataset
# COMMENTED - Otherwise, this would not allow to map a column of the input dataset
# to the different CDE codes.
# UNCOMMENT IF NEEDED
# if len(self.columnsCDEsMappingData["dataset_column"].unique()) != len(
# self.columnsCDEsMappingData["dataset_column"]
# ):
# errMsg = (
# "The mapping is not valid. "
# "Please check it and remove any mapping row(s) "
# "that might map the same column(s) of "
# "the source dataset to multiple CDE codes!"
# )
# QMessageBox.warning(
# None,
# "Error: Duplicate Column / CDEs Pairs",
# errMsg,
# )
# self.updateStatusbar(errMsg)
# self.disableMappingMapButtons()
# return
# Check if the mapping contains only valid CDE codes
if self.columnsCDEsMappingData[
self.columnsCDEsMappingData["cde_code"].isin(self.targetCDEs["code"])
].empty:
errMsg = (
"The mapping is not valid. "
"Please check it and remove any invalid CDE code!"
)
QMessageBox.warning(
None,
"Error: Invalid CDE Codes",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
return
# Check if the mapping transform is correctly formatted
transformList = self.columnsCDEsMappingData["transform"].tolist()
def is_invalid_map_transform(transform):
"""Check if the transform is an invalid map transform.
We expect the transform to be a valid Python dictionary or
a valid Python literal. If it is not, it is invalid.
Parameters
----------
transform : str
The transform to check.
"""
try:
ast.literal_eval(f"{transform}")
return False
except ValueError:
return True
isInvalidTransformList = list(map(is_invalid_map_transform, transformList))
if any(isInvalidTransformList):
df_invalidtransform_with_index = pd.DataFrame(
{
"transform": [
transformList[i]
for i in range(len(transformList))
if isInvalidTransformList[i]
],
"mapping_row": [
i + 1
for i in range(len(transformList))
if isInvalidTransformList[i]
],
}
)
errMsg = (
"The mapping is not valid. "
"Please check it and correct "
"any invalid transform!"
f" (invalid transforms: {df_invalidtransform_with_index})"
)
QMessageBox.warning(
None,
"Error: Invalid Transform",
errMsg,
)
self.updateStatusbar(errMsg)
self.disableMappingMapButtons()
return
# If the mapping is valid, display a success message
successMsg = (
"The mapping is valid! "
"You can now save it and use it to map the source dataset."
)
QMessageBox.information(
None,
"Success",
successMsg,
)
self.updateStatusbar(successMsg)
self.mappingSaveButton.setEnabled(True)
self.mapButton.setEnabled(False)
[docs] def initMapping(self):
"""Initialize an empty column/CDEs mapping table."""
infoMsg = (
"The empty mapping table is being created. "
"Please wait until the process is finished."
)
self.updateStatusbar(infoMsg)
# Create a first empty mapping table
self.matchedCdeCodes = None
self.columnsCDEsMappingData = pd.DataFrame(columns=MAPPING_TABLE_COLUMNS)
# Create a pandas model for the mapping table
self.columnsCDEsMappingPandasModel = PandasTableModel(
self.columnsCDEsMappingData
)
# Set the model of the table view to the pandas model
self.mappingTableView.setModel(self.columnsCDEsMappingPandasModel)
self.mappingTableView.setSelectionBehavior(self.mappingTableView.SelectRows)
self.mappingTableView.setSelectionMode(self.mappingTableView.SingleSelection)
self.mappingTableView.setEditTriggers(
self.mappingTableView.NoEditTriggers
) # disable editing
# Handle the mapping table view row selection changed signal
self.mappingTableView.selectionModel().currentRowChanged.connect(
self.initializeMappingEditForm
)
# Select the first row of the mapping table view at the beginning
indexRow = 0
self.mappingTableView.selectRow(indexRow)
# Handle the combox box current index changed signal for the CDE code column
self.cdeCode.currentIndexChanged.connect(self.updateMappingEditForm)
# Show status message
infoMsg = (
"The mapping has been created. You can now edit, validate, and save it!"
)
self.updateStatusbar(infoMsg)
[docs] def mappingMatch(self):
"""Initialize the column/CDEs mapping based on fuzzy word matching and character embedding methods."""
matchingMethod = self.initMatchingMethod.currentText()
infoMsg = (
f"The mapping is being initialize using the {matchingMethod} method."
"Please wait until the process is finished."
)
self.updateStatusbar(infoMsg)
# Create a first mapping table based on fuzzy matching
(
self.columnsCDEsMappingData,
self.matchedCdeCodes,
self.inputDatasetColumnEmbeddings,
self.targetCDEsEmbeddings,
) = match_columns_to_cdes(
dataset=self.inputDataset,
schema=self.targetCDEs,
nb_kept_matches=NB_KEPT_MATCHES,
matching_method=matchingMethod,
)
# Create a pandas model for the mapping table
self.columnsCDEsMappingPandasModel = PandasTableModel(
self.columnsCDEsMappingData
)
# Set the model of the table view to the pandas model
self.mappingTableView.setModel(self.columnsCDEsMappingPandasModel)
self.mappingTableView.setSelectionBehavior(self.mappingTableView.SelectRows)
self.mappingTableView.setSelectionMode(self.mappingTableView.SingleSelection)
self.mappingTableView.setEditTriggers(
self.mappingTableView.NoEditTriggers
) # disable editing
# Handle the mapping table view row selection changed signal
self.mappingTableView.selectionModel().currentRowChanged.connect(
self.initializeMappingEditForm
)
# Select the first row of the mapping table view at the beginning
indexRow = 0
self.mappingTableView.selectRow(indexRow)
# Handle the combox box current index changed signal for the CDE code column
self.cdeCode.currentIndexChanged.connect(self.updateMappingEditForm)
# Show status message
infoMsg = "The mapping has been created. You can now edit, check, and save it!"
self.updateStatusbar(infoMsg)
self.enableMappingButtons()
if matchingMethod != "fuzzy":
self.embeddingVizButton.setEnabled(True)
else:
self.embeddingVizButton.setEnabled(False)
self.matchingVizButton.setEnabled(True)
[docs] def selectOutputFilename(self):
"""Select the output filename."""
self.outputFilename = QFileDialog.getSaveFileName(
None, "Select the output filename", "", "CSV files (*.csv)"
)
if self.outputFilename[0] == "":
errMsg = "Please select a valid output filename."
QMessageBox.warning(
None,
"Error",
errMsg,
)
self.updateStatusbar(errMsg)
return False
if not self.outputFilename[0].endswith(".csv"):
self.outputFilename = self.outputFilename[0] + ".csv"
successMsg = (
"The output filename has been updated to: " + self.outputFilename + "."
)
QMessageBox.information(
None,
successMsg,
)
self.updateStatusbar(successMsg)
return True
[docs] def updateStatusbar(self, message):
"""Update the statusbar with the given message."""
self.statusbar.showMessage(message)
self.statusbar.repaint()
[docs] def map(self):
"""Map the input dataset to the target CDEs."""
select = self.selectOutputFilename()
# Exit function if the output filename is not properly set
if not select:
return
# Check if the input dataset and the mapping file are loaded
if not os.path.exists(self.inputDatasetPathLabel.text()):
warn_msg = "Please load the input dataset!"
QMessageBox.warning(
None,
"Warning",
warn_msg,
QMessageBox.Ok,
)
self.updateStatusbar(warn_msg)
return
if not os.path.exists(self.mappingFilePathLabel.text()):
warn_msg = "Please save the mapping file of load an existing one!"
QMessageBox.warning(
None,
"Warning",
warn_msg,
QMessageBox.Ok,
)
self.updateStatusbar(warn_msg)
return
# Proceed with the mapping
self.mapButton.setEnabled(False)
self.updateStatusbar("Mapping in progress...")
# Load the input dataset
input_dataset = pd.read_csv(self.inputDatasetPathLabel.text())
# Load the mapping file
with open(self.mappingFilePathLabel.text(), "r") as f:
mapping = json.load(f)
# Map the input dataset to the target CDEs
output_dataset = map_dataset(
input_dataset, mapping, self.targetCDEs["code"].tolist()
)
# Save the output dataset
output_dataset.to_csv(
self.outputFilename[0],
index=False,
)
# Show a message box to inform the user that the mapping has
# been done successfully
successMsg = (
"The mapping has been done successfully and "
"the output dataset has been saved to: " + self.outputFilename[0] + "."
)
QMessageBox.information(
None,
"Success",
successMsg,
QMessageBox.Ok,
)
self.updateStatusbar(successMsg)
self.mapButton.setEnabled(True)