Source code for mip_dmp.qt5.components.matching_visualization_widget
# Copyright 2023 The HIP team, University Hospital of Lausanne (CHUV), Switzerland & Contributors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Module that defines the class for the widget that supports the visualization of the distances obtained by the automated mapping matches for the n most similar CDE codes."""
# External imports
import os
import matplotlib.pyplot as plt
import pkg_resources
from matplotlib.backends.backend_qt5agg import (
FigureCanvasQTAgg as FigureCanvas,
NavigationToolbar2QT as NavigationToolbar,
)
from PySide2.QtCore import QCoreApplication
from PySide2.QtWidgets import QVBoxLayout, QWidget, QComboBox
# Internal imports
from mip_dmp.plot.matching import heatmap_matching
from mip_dmp.process.matching import make_distance_vector
# Constants
WINDOW_NAME = "Column /CDE Match Distance Visualization"
NB_KEPT_MATCHES = 15
[docs]class MatchingVisualizationWidget(QWidget):
"""Class for the widget that supports the visualization of the distances / similarity measures obtained by the automated mapping matches for the n most similar CDE codes."""
def __init__(
self,
inputDatasetColumns=None,
targetCDECodes=None,
matchedCdeCodes=None,
matchingMethod=None,
parent=None,
):
"""Initialize the widget. If parent is `None`, the widget renders as a separate window.
inputDatasetColumns: list
List of the input dataset columns.
targetCDECodes: list
List of the target CDE codes.
matchedCdeCodes: dict
Dictionary with the matched CDE codes in the following format::
{
"input_dataset_column_1": {
"words": [ "cde_code_1", "cde_code_2", ... ],
"distances": [ distance_1, distance_2, ... ],
"embeddings": [ embedding_1, embedding_2, ... ]
},
"input_dataset_column_2": {
"words": [ "cde_code_1", "cde_code_2", ... ],
"distances": [ distance_1, distance_2, ... ],
"embeddings": [ embedding_1, embedding_2, ... ]
},
...
}
matchingMethod: str
String with the matching method. Can be one of the following:
- `fuzzy`
- `chars2vec`
- `glove`
"""
super(MatchingVisualizationWidget, self).__init__(parent)
self.adjustWindow()
self.widgetLayout = QVBoxLayout()
self.setLayout(self.widgetLayout)
# Set up the combo box for selecting the word to visualize
# its dimensionaly reduced embedding vector in the 3D scatter plot
# with the ones of the CDE codes
self.wordComboBox = QComboBox()
self.widgetLayout.addWidget(self.wordComboBox)
# Set up the matplotlib figure and canvas
self.canvasLayout = QVBoxLayout()
self.figure = plt.figure(figsize=(12, 12))
self.canvas = FigureCanvas(self.figure)
self.toolbar = NavigationToolbar(self.canvas, self)
self.canvasLayout.addWidget(self.canvas)
self.canvasLayout.addWidget(self.toolbar)
self.widgetLayout.addLayout(self.canvasLayout, stretch=1)
# Initialize the class attributes (if set)
self.inputDatasetColumns = (
inputDatasetColumns if inputDatasetColumns else list()
)
self.targetCDECodes = targetCDECodes if targetCDECodes else list()
self.matchedCdeCodes = matchedCdeCodes if matchedCdeCodes else dict()
self.matchingMethod = matchingMethod if matchingMethod else None
# Connect the combo box to the function that generates the heatmap
self.wordComboBox.currentIndexChanged.connect(self.generate_heatmap_figure)
[docs] def adjustWindow(self):
"""Adjust the window size, Qt Style Sheet, and title.
Parameters
----------
mainWindow : QMainWindow
The main window of the application.
"""
# Adjust the window size
# self.resize(1280, 720)
# Set the window Qt Style Sheet
styleSheetFile = pkg_resources.resource_filename(
"mip_dmp", os.path.join("qt5", "assets", "stylesheet.qss")
)
with open(styleSheetFile, "r") as fh:
self.setStyleSheet(fh.read())
# Set the window title
self.setWindowTitle(
QCoreApplication.translate(f"{WINDOW_NAME}", f"{WINDOW_NAME}", None)
)
[docs] def set_wordcombobox_items(self, wordList):
"""Set the items of the word combo box.
Parameters
----------
wordList: list
List of the words to add to the combo box.
"""
self.wordComboBox.clear()
self.wordComboBox.addItems(wordList)
[docs] def generate_heatmap_figure(self):
"""Generate a heatmap figure with seaborn that shows the similarity / distance matrix of the input dataset columns and the target CDE codes."""
matchedCdeCodes = self.matchedCdeCodes.copy()
# Keep only the NB_KEPT_MATCHES most similar CDE codes for a variable
for key in ["words", "distances"]:
matchedCdeCodes[self.wordComboBox.currentText()][
key
] = matchedCdeCodes[self.wordComboBox.currentText()][
key
][:NB_KEPT_MATCHES]
# Generate the distance vector
distanceVector = make_distance_vector(
matchedCdeCodes, self.wordComboBox.currentText()
)
# Generate the heatmap
self.figure.clear()
self.figure = heatmap_matching(
self.figure,
distanceVector,
[
self.wordComboBox.currentText()
], # give the input dataset column only for y labels
matchedCdeCodes[self.wordComboBox.currentText()][
"words"
][:],
self.matchingMethod,
)
# Draw the figure
self.figure.canvas.draw()