first we start with loading the data

Then we explore the variables of weight and height

hist(my_data$DM21_01,
     main = "Histogram of DM21_01",
     xlab = "Weight before",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

hist(my_data$DM21_02,
     main = "Histogram of DM21_02",
     xlab = "Weight after",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

hist(my_data$DM21_03,
     main = "Histogram of DM21_03",
     xlab = "Height",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

then We clean data by removing participant with impossible weight and height

my_data_clean <- my_data[my_data$DM21_03 > 60 | is.na(my_data$DM21_03), ]



hist(my_data_clean$DM21_03,
     main = "Histogram of DM21_03",
     xlab = "Height",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

then we calculate BMI before and BMI after for each participant as well as BMI change

# Calculate BMI before (based on DM21_01 - weight before and DM21_03 - height)
my_data_clean$BMI_before <- my_data_clean$DM21_01 / (my_data_clean$DM21_03 / 100)^2

# Calculate BMI after (based on DM21_02 - weight after and DM21_03 - height)
my_data_clean$BMI_after <- my_data_clean$DM21_02 / (my_data_clean$DM21_03 / 100)^2

# Display the first few rows to verify the calculations
head(my_data_clean[, c("DM21_01", "DM21_02", "DM21_03", "BMI_before", "BMI_after")])

## # A tibble: 6 × 5
##   DM21_01 DM21_02 DM21_03 BMI_before BMI_after
##     <dbl>   <dbl>   <dbl>      <dbl>     <dbl>
## 1      NA      NA      NA       NA        NA  
## 2      75      75     179       23.4      23.4
## 3      70      65     168       24.8      23.0
## 4     112     125     177       35.7      39.9
## 5     120     120     165       44.1      44.1
## 6      NA      NA      NA       NA        NA

# Calculate BMI change
my_data_clean$BMI_change <- my_data_clean$BMI_after - my_data_clean$BMI_before
my_data_clean$BMI_change <- as.numeric(as.character(my_data_clean$BMI_change))


# Histogram of BMI changes
hist(my_data_clean$BMI_change,
     main = "Histogram of BMI Changes",
     xlab = "BMI Change (After - Before)",
     col = "lightblue",
     border = "black")

Then we check the EQ5D-VAS variable (Gesundheitszustand: Ihr heutiger Gesundheitszustand) and clean data from missing values

my_data_clean$EQ_08_01 <- as.numeric(as.character(my_data_clean$EQ_08_01))

table(my_data_clean$EQ_08_01)

## 
##  -9   1   5   6   7   8   9  10  11  12  13  14  16  17  18  19  20  21  22  23 
##   5   1   1   1   2   1   2   1   5   1   1   2   5   7   9   2   3   8   5   4 
##  24  25  26  27  28  29  31  32  33  34  35  36  37  38  39  40  41  42  43  44 
##   2   6  10   3   2   1   8   7   9  18  11   9   3   3   1   3  14   7   9   5 
##  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  65 
##   3   5   1   3   2   6  31   3   2   1   1   3   3   2   4   3   8   5   1   2 
##  66  67  68  69  70  71  72  73  74  75  76  77  79  80  81  82  83  84  85  86 
##   5  13   4   5   5   5   1   2   3   2   5   1   1   2   4   2   5   6   6   3 
##  87  89  90  91  92  96  97 100 101 
##   4   1   3   1   2   5   2   2   6

my_data_clean <- my_data_clean[my_data_clean$EQ_08_01 > 0, ]

then first we look to a scatter plot in order to visualize the relationship between BMI change and EQ5D-VAS variable (heutiger Gesundheitszustand) (also I added a linear regression line to the plot so we can take an impression of the relationship between variables)

EQ5D-VAS current score

BMI before vs EQ5D-VAS current

now we are looking for correlation of BMI score of before Covid and current EQ5D-VAS score

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand current vs. BMI before)",
     xlab = "Gesundheitszustand current",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

cor_test_BMIbefore_EQ5D_after <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_BMIbefore_EQ5D_after)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_before
## t = -1.2134, df = 381, p-value = 0.2257
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1612464  0.0384031
## sample estimates:
##         cor 
## -0.06204225

as you can see there no statistically significant correlation between BMI score before covid and current EQ5D-VAS score

BMI after vs EQ5D-VAS current

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand current vs. BMI after)",
     xlab = "Gesundheitszustand current",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

cor_test_BMIafter_EQ5D_after <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_BMIafter_EQ5D_after)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_after
## t = -1.2441, df = 380, p-value = 0.2142
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.16298800  0.03688173
## sample estimates:
##         cor 
## -0.06369179

BMI change vs EQ5D-VAS Current

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand vs. BMI Change",
     xlab = "Gesundheitszustand",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

Then we continue with calculating pearson correlation of BMI change and and EQ5D-VAS (heutiger Gesundheitszustand)

cor_test <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_change
## t = -0.39603, df = 380, p-value = 0.6923
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.12040409  0.08018958
## sample estimates:
##         cor 
## -0.02031166

So from this test we can conclude there is no direct correlation between BMI change and EQ5D-VAS (Gesundheitszustand)

EQ5D-VAS score change (before corona and current)

then we are looking for the correlation between changes in BMI and changes in EQ5D-VAS status between pre-Covid and now

my_data_clean <- my_data_clean[my_data_clean$DM20_01 > 0, ]
my_data_clean$DM20_01 <- as.numeric(as.character(my_data_clean$DM20_01))

my_data_clean$health_status_change <- my_data_clean$EQ_08_01 - my_data_clean$DM20_01

hist(my_data_clean$health_status_change,
     main = "Histogram of health status Changes",
     xlab = "health status (Now - before Covid)",
     col = "lightblue",
     border = "black")

As we can guess participants reported their health condition went worse after Covid,

BMI before vs EQ5D-VAS change

Lets look what will happen if we look on correlation of BMI before Covid and changes in EQ5D-VAS status between pre-Covid and now

plot(my_data_clean$health_status_change, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI before)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_before <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_before)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_before
## t = 1.4715, df = 359, p-value = 0.142
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02599819  0.17921409
## sample estimates:
##        cor 
## 0.07742798

here we couldn’t spot any fatalistically significant correlation between BMI before covid and change in EQ5D-VAS score changes

BMI after vs EQ5D-VAS change

plot(my_data_clean$health_status_change, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI after)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_after <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_after)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_after
## t = 1.1642, df = 358, p-value = 0.2451
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04221429  0.16373792
## sample estimates:
##        cor 
## 0.06141551

lets look if there is a correlation between change of their health condition and their BMI during the period

BMI change vs EQ5D-VAS change

plot(my_data_clean$health_status_change, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI Change)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_HC <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_HC)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_change
## t = -0.62203, df = 358, p-value = 0.5343
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1357585  0.0707443
## sample estimates:
##         cor 
## -0.03285779

Again no correlation found between the two variables.

lets dive into IMET scale, I am first starting with calculating total IMET score based on question of IMET 1 to 8 (I only include participants who have answerd all of the questions)

# Variables of interest
variables_to_convert <- paste0("ME01_0", 1:8)

# Convert to numeric, replace -9 with NA, and recode values from 1–11 to 0–10
my_data_clean[variables_to_convert] <- my_data_clean[variables_to_convert] |>
  lapply(function(column) {
    numeric_column <- as.numeric(column)  # Convert to numeric
    numeric_column[numeric_column == -9] <- NA  # Replace -9 with NA
    numeric_column <- numeric_column - 1  # Recode values: 1–11 to 0–10
    return(numeric_column)
  }) |>
  as.data.frame()  # Convert back to data frame

# Create a new variable by summing the values of the selected columns,
# setting NA for participants with any missing values in the selected variables
my_data_clean$ME01_sum <- apply(my_data_clean[variables_to_convert], 1, function(row) {
  if (any(is.na(row))) {
    return(NA)  # Set NA if any variable is missing
  } else {
    return(sum(row))  # Calculate the sum if all variables have values
  }
})

IMET standard (only 8 main questions)

BMI before vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand vs. BMI before",
     xlab = "IMET total",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_IMET)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_before
## t = 2.7312, df = 345, p-value = 0.006634
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04082206 0.24697911
## sample estimates:
##       cor 
## 0.1454794

The Pearson correlation analysis shows a weak positive relationship between ME01_sum and BMI_before (r = 0.145, p = 0.007). The p-value indicates that the correlation is statistically significant at the 0.05 level, suggesting that as BMI_before increases, there is a slight tendency for ME01_sum to increase.(participants who had a higher BMI before covid now have a worse IMET score)

BMI after vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand vs. BMI after",
     xlab = "IMET total",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET2 <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_IMET2)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_after
## t = 2.7936, df = 344, p-value = 0.005504
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04420032 0.25044296
## sample estimates:
##      cor 
## 0.148941

again we are seeing a similar pattern here for BMI after total IMET score

BMI change vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand vs. BMI Change",
     xlab = "IMET total",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET3 <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_IMET3)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_change
## t = 0.64114, df = 344, p-value = 0.5219
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07114663  0.13947408
## sample estimates:
##        cor 
## 0.03454732

But for BMI change we don’t have any statistical significant correlation between total IMET score and BMI change

# Load dplyr for recode function


# Step 1: Dynamically find columns matching "FA01_01" to "FA01_10"
variables_to_convert2 <- grep("^FA01_0[1-9]$|^FA01_10$", names(my_data_clean), value = TRUE)

# Step 2: Convert to numeric and replace -9 with NA
my_data_clean[variables_to_convert2] <- as.data.frame(
  lapply(my_data_clean[variables_to_convert2], function(column) {
    numeric_column <- as.numeric(as.character(column))  # Convert to numeric
    numeric_column[numeric_column == -9] <- NA          # Replace -9 with NA
    return(numeric_column)
  })
)

# Step 3: Verify the changes
 # Check the selected variables
 # Confirm numeric conversion



# Step 3: Recode scores for questions 4 and 10
my_data_clean$FA01_04 <- recode(my_data_clean$FA01_04, `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)
my_data_clean$FA01_10 <- recode(my_data_clean$FA01_10, `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)

# Step 4: Calculate the total FAS score only for participants who answered all questions
my_data_clean$FAS_total <- apply(my_data_clean[variables_to_convert2], 1, function(row) {
  if (any(is.na(row))) {
    return(NA)  # Set NA if any question is unanswered
  } else {
    return(sum(row))  # Calculate the sum if all questions are answered
  }
})

FAS

BMI before vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_before,
     main = "Scatter Plot of FAS total vs. BMI before",
     xlab = "FAS_total",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIbefore <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIbefore)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_before
## t = 3.5779, df = 340, p-value = 0.0003966
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08617858 0.29066652
## sample estimates:
##      cor 
## 0.190488

we found a weak positive relationship between BMI before corona and FAS score (r = 0.190, p = 0.0004). The significant p-value and 95% CI (0.086, 0.291) confirm a small but positive association.

BMI after vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_after,
     main = "Scatter Plot of FAS total vs. BMI after",
     xlab = "FAS_total",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIafter <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIafter)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_after
## t = 3.7274, df = 339, p-value = 0.0002267
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09419661 0.29833659
## sample estimates:
##       cor 
## 0.1984175

Also there is a weak but statistically significant correlation between BMI after corona and FAS score

BMI change vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_change,
     main = "Scatter Plot of FAS total vs. BMI change",
     xlab = "FAS_total",
     ylab = "BMI change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIchange <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIchange)

## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_change
## t = 1.1044, df = 339, p-value = 0.2702
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04662708  0.16503184
## sample estimates:
##        cor 
## 0.05987536

there is no he statistically significant correlation between BMI change and FAS score

IMET vs EQ5D-VAS current

at the end I wanted to look up if there is a strong correlation between IMET total variable and EQ5D-VAS. as we can expect a strong negative correlation. (higher values for IMET indicates worse health condition and higher values for EQ5D-VAS shows better health conditions)

plot(my_data_clean$EQ_08_01, my_data_clean$ME01_sum,
     main = "Scatter Plot of Gesundheitszustand vs. IMET sum",
     xlab = "Gesundheitszustand",
     ylab = "IMET sum",
     col = "blue", pch = 16)
abline(lm(ME01_sum ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

As we can see correlation is as could expect, so we can continue with our research question about relation of BMI (before,after and change) and IMET scale

Defeat BMI and Psychological Variables

2025-01-15