first we start with loading the data

Then we explore the variables of weight and height

hist(my_data$DM21_01,
     main = "Histogram of DM21_01",
     xlab = "Weight before",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

hist(my_data$DM21_02,
     main = "Histogram of DM21_02",
     xlab = "Weight after",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

hist(my_data$DM21_03,
     main = "Histogram of DM21_03",
     xlab = "Height",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

then We clean data by removing participant with impossible weight and height

my_data_clean <- my_data[my_data$DM21_03 > 60 | is.na(my_data$DM21_03), ]



hist(my_data_clean$DM21_03,
     main = "Histogram of DM21_03",
     xlab = "Height",
     ylab = "Frequency",
     col = "lightblue",
     border = "black")

then we calculate BMI before and BMI after for each participant as well as BMI change

# Calculate BMI before (based on DM21_01 - weight before and DM21_03 - height)
my_data_clean$BMI_before <- my_data_clean$DM21_01 / (my_data_clean$DM21_03 / 100)^2

# Calculate BMI after (based on DM21_02 - weight after and DM21_03 - height)
my_data_clean$BMI_after <- my_data_clean$DM21_02 / (my_data_clean$DM21_03 / 100)^2

# Display the first few rows to verify the calculations
head(my_data_clean[, c("DM21_01", "DM21_02", "DM21_03", "BMI_before", "BMI_after")])
## # A tibble: 6 × 5
##   DM21_01 DM21_02 DM21_03 BMI_before BMI_after
##     <dbl>   <dbl>   <dbl>      <dbl>     <dbl>
## 1      NA      NA      NA       NA        NA  
## 2      75      75     179       23.4      23.4
## 3      70      65     168       24.8      23.0
## 4     112     125     177       35.7      39.9
## 5     120     120     165       44.1      44.1
## 6      NA      NA      NA       NA        NA
# Calculate BMI change
my_data_clean$BMI_change <- my_data_clean$BMI_after - my_data_clean$BMI_before
my_data_clean$BMI_change <- as.numeric(as.character(my_data_clean$BMI_change))


# Histogram of BMI changes
hist(my_data_clean$BMI_change,
     main = "Histogram of BMI Changes",
     xlab = "BMI Change (After - Before)",
     col = "lightblue",
     border = "black")

Then we check the EQ5D-VAS variable (Gesundheitszustand: Ihr heutiger Gesundheitszustand) and clean data from missing values

my_data_clean$EQ_08_01 <- as.numeric(as.character(my_data_clean$EQ_08_01))

table(my_data_clean$EQ_08_01)
## 
##  -9   1   5   6   7   8   9  10  11  12  13  14  16  17  18  19  20  21  22  23 
##   5   1   1   1   2   1   2   1   5   1   1   2   5   7   9   2   3   8   5   4 
##  24  25  26  27  28  29  31  32  33  34  35  36  37  38  39  40  41  42  43  44 
##   2   6  10   3   2   1   8   7   9  18  11   9   3   3   1   3  14   7   9   5 
##  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  65 
##   3   5   1   3   2   6  31   3   2   1   1   3   3   2   4   3   8   5   1   2 
##  66  67  68  69  70  71  72  73  74  75  76  77  79  80  81  82  83  84  85  86 
##   5  13   4   5   5   5   1   2   3   2   5   1   1   2   4   2   5   6   6   3 
##  87  89  90  91  92  96  97 100 101 
##   4   1   3   1   2   5   2   2   6
my_data_clean <- my_data_clean[my_data_clean$EQ_08_01 > 0, ]

then first we look to a scatter plot in order to visualize the relationship between BMI change and EQ5D-VAS variable (heutiger Gesundheitszustand) (also I added a linear regression line to the plot so we can take an impression of the relationship between variables)

EQ5D-VAS current score

BMI before vs EQ5D-VAS current

now we are looking for correlation of BMI score of before Covid and current EQ5D-VAS score

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand current vs. BMI before)",
     xlab = "Gesundheitszustand current",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

cor_test_BMIbefore_EQ5D_after <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_BMIbefore_EQ5D_after)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_before
## t = -1.2134, df = 381, p-value = 0.2257
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1612464  0.0384031
## sample estimates:
##         cor 
## -0.06204225

as you can see there no statistically significant correlation between BMI score before covid and current EQ5D-VAS score

BMI after vs EQ5D-VAS current

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand current vs. BMI after)",
     xlab = "Gesundheitszustand current",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

cor_test_BMIafter_EQ5D_after <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_BMIafter_EQ5D_after)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_after
## t = -1.2441, df = 380, p-value = 0.2142
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.16298800  0.03688173
## sample estimates:
##         cor 
## -0.06369179

BMI change vs EQ5D-VAS Current

plot(my_data_clean$EQ_08_01, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand vs. BMI Change",
     xlab = "Gesundheitszustand",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

Then we continue with calculating pearson correlation of BMI change and and EQ5D-VAS (heutiger Gesundheitszustand)

cor_test <- cor.test(my_data_clean$EQ_08_01, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$EQ_08_01 and my_data_clean$BMI_change
## t = -0.39603, df = 380, p-value = 0.6923
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.12040409  0.08018958
## sample estimates:
##         cor 
## -0.02031166

So from this test we can conclude there is no direct correlation between BMI change and EQ5D-VAS (Gesundheitszustand)

EQ5D-VAS score change (before corona and current)

then we are looking for the correlation between changes in BMI and changes in EQ5D-VAS status between pre-Covid and now

my_data_clean <- my_data_clean[my_data_clean$DM20_01 > 0, ]
my_data_clean$DM20_01 <- as.numeric(as.character(my_data_clean$DM20_01))

my_data_clean$health_status_change <- my_data_clean$EQ_08_01 - my_data_clean$DM20_01

hist(my_data_clean$health_status_change,
     main = "Histogram of health status Changes",
     xlab = "health status (Now - before Covid)",
     col = "lightblue",
     border = "black")

As we can guess participants reported their health condition went worse after Covid,

BMI before vs EQ5D-VAS change

Lets look what will happen if we look on correlation of BMI before Covid and changes in EQ5D-VAS status between pre-Covid and now

plot(my_data_clean$health_status_change, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI before)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_before <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_before)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_before
## t = 1.4715, df = 359, p-value = 0.142
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.02599819  0.17921409
## sample estimates:
##        cor 
## 0.07742798

here we couldn’t spot any fatalistically significant correlation between BMI before covid and change in EQ5D-VAS score changes

BMI after vs EQ5D-VAS change

plot(my_data_clean$health_status_change, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI after)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_after <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_after)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_after
## t = 1.1642, df = 358, p-value = 0.2451
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04221429  0.16373792
## sample estimates:
##        cor 
## 0.06141551

lets look if there is a correlation between change of their health condition and their BMI during the period

BMI change vs EQ5D-VAS change

plot(my_data_clean$health_status_change, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand change vs. BMI Change)",
     xlab = "Gesundheitszustand change",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ health_status_change, data = my_data_clean), col = "red", lwd = 2)

cor_test_HC <- cor.test(my_data_clean$health_status_change, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_HC)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$health_status_change and my_data_clean$BMI_change
## t = -0.62203, df = 358, p-value = 0.5343
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1357585  0.0707443
## sample estimates:
##         cor 
## -0.03285779

Again no correlation found between the two variables.

lets dive into IMET scale, I am first starting with calculating total IMET score based on question of IMET 1 to 8 (I only include participants who have answerd all of the questions)

# Variables of interest
variables_to_convert <- paste0("ME01_0", 1:8)

# Convert to numeric, replace -9 with NA, and recode values from 1–11 to 0–10
my_data_clean[variables_to_convert] <- my_data_clean[variables_to_convert] |>
  lapply(function(column) {
    numeric_column <- as.numeric(column)  # Convert to numeric
    numeric_column[numeric_column == -9] <- NA  # Replace -9 with NA
    numeric_column <- numeric_column - 1  # Recode values: 1–11 to 0–10
    return(numeric_column)
  }) |>
  as.data.frame()  # Convert back to data frame

# Create a new variable by summing the values of the selected columns,
# setting NA for participants with any missing values in the selected variables
my_data_clean$ME01_sum <- apply(my_data_clean[variables_to_convert], 1, function(row) {
  if (any(is.na(row))) {
    return(NA)  # Set NA if any variable is missing
  } else {
    return(sum(row))  # Calculate the sum if all variables have values
  }
})

IMET standard (only 8 main questions)

BMI before vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_before,
     main = "Scatter Plot of Gesundheitszustand vs. BMI before",
     xlab = "IMET total",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_IMET)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_before
## t = 2.7312, df = 345, p-value = 0.006634
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04082206 0.24697911
## sample estimates:
##       cor 
## 0.1454794

The Pearson correlation analysis shows a weak positive relationship between ME01_sum and BMI_before (r = 0.145, p = 0.007). The p-value indicates that the correlation is statistically significant at the 0.05 level, suggesting that as BMI_before increases, there is a slight tendency for ME01_sum to increase.(participants who had a higher BMI before covid now have a worse IMET score)

BMI after vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_after,
     main = "Scatter Plot of Gesundheitszustand vs. BMI after",
     xlab = "IMET total",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET2 <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_IMET2)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_after
## t = 2.7936, df = 344, p-value = 0.005504
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.04420032 0.25044296
## sample estimates:
##      cor 
## 0.148941

again we are seeing a similar pattern here for BMI after total IMET score

BMI change vs IMET standard

plot(my_data_clean$ME01_sum, my_data_clean$BMI_change,
     main = "Scatter Plot of Gesundheitszustand vs. BMI Change",
     xlab = "IMET total",
     ylab = "BMI Change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ ME01_sum, data = my_data_clean), col = "red", lwd = 2)

cor_test_IMET3 <- cor.test(my_data_clean$ME01_sum, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_IMET3)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$ME01_sum and my_data_clean$BMI_change
## t = 0.64114, df = 344, p-value = 0.5219
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.07114663  0.13947408
## sample estimates:
##        cor 
## 0.03454732

But for BMI change we don’t have any statistical significant correlation between total IMET score and BMI change

# Load dplyr for recode function


# Step 1: Dynamically find columns matching "FA01_01" to "FA01_10"
variables_to_convert2 <- grep("^FA01_0[1-9]$|^FA01_10$", names(my_data_clean), value = TRUE)

# Step 2: Convert to numeric and replace -9 with NA
my_data_clean[variables_to_convert2] <- as.data.frame(
  lapply(my_data_clean[variables_to_convert2], function(column) {
    numeric_column <- as.numeric(as.character(column))  # Convert to numeric
    numeric_column[numeric_column == -9] <- NA          # Replace -9 with NA
    return(numeric_column)
  })
)

# Step 3: Verify the changes
 # Check the selected variables
 # Confirm numeric conversion



# Step 3: Recode scores for questions 4 and 10
my_data_clean$FA01_04 <- recode(my_data_clean$FA01_04, `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)
my_data_clean$FA01_10 <- recode(my_data_clean$FA01_10, `1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1)

# Step 4: Calculate the total FAS score only for participants who answered all questions
my_data_clean$FAS_total <- apply(my_data_clean[variables_to_convert2], 1, function(row) {
  if (any(is.na(row))) {
    return(NA)  # Set NA if any question is unanswered
  } else {
    return(sum(row))  # Calculate the sum if all questions are answered
  }
})

FAS

BMI before vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_before,
     main = "Scatter Plot of FAS total vs. BMI before",
     xlab = "FAS_total",
     ylab = "BMI before",
     col = "blue", pch = 16)
abline(lm(BMI_before ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIbefore <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_before, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIbefore)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_before
## t = 3.5779, df = 340, p-value = 0.0003966
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.08617858 0.29066652
## sample estimates:
##      cor 
## 0.190488

we found a weak positive relationship between BMI before corona and FAS score (r = 0.190, p = 0.0004). The significant p-value and 95% CI (0.086, 0.291) confirm a small but positive association.

BMI after vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_after,
     main = "Scatter Plot of FAS total vs. BMI after",
     xlab = "FAS_total",
     ylab = "BMI after",
     col = "blue", pch = 16)
abline(lm(BMI_after ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIafter <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_after, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIafter)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_after
## t = 3.7274, df = 339, p-value = 0.0002267
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.09419661 0.29833659
## sample estimates:
##       cor 
## 0.1984175

Also there is a weak but statistically significant correlation between BMI after corona and FAS score

BMI change vs FAS

plot(my_data_clean$FAS_total, my_data_clean$BMI_change,
     main = "Scatter Plot of FAS total vs. BMI change",
     xlab = "FAS_total",
     ylab = "BMI change",
     col = "blue", pch = 16)
abline(lm(BMI_change ~ FAS_total, data = my_data_clean), col = "red", lwd = 2)

cor_test_FAS_BMIchange <- cor.test(my_data_clean$FAS_total, my_data_clean$BMI_change, method = "pearson", use = "complete.obs")
print(cor_test_FAS_BMIchange)
## 
##  Pearson's product-moment correlation
## 
## data:  my_data_clean$FAS_total and my_data_clean$BMI_change
## t = 1.1044, df = 339, p-value = 0.2702
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.04662708  0.16503184
## sample estimates:
##        cor 
## 0.05987536

there is no he statistically significant correlation between BMI change and FAS score

IMET vs EQ5D-VAS current

at the end I wanted to look up if there is a strong correlation between IMET total variable and EQ5D-VAS. as we can expect a strong negative correlation. (higher values for IMET indicates worse health condition and higher values for EQ5D-VAS shows better health conditions)

plot(my_data_clean$EQ_08_01, my_data_clean$ME01_sum,
     main = "Scatter Plot of Gesundheitszustand vs. IMET sum",
     xlab = "Gesundheitszustand",
     ylab = "IMET sum",
     col = "blue", pch = 16)
abline(lm(ME01_sum ~ EQ_08_01, data = my_data_clean), col = "red", lwd = 2)

As we can see correlation is as could expect, so we can continue with our research question about relation of BMI (before,after and change) and IMET scale