##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
calculating bmi
combined_data_clean$DM21_01 <- as.numeric(as.character(combined_data_clean$DM21_01))
combined_data_clean$DM21_02 <- as.numeric(as.character(combined_data_clean$DM21_02))
combined_data_clean$DM21_03 <- as.numeric(as.character(combined_data_clean$DM21_03))
# Calculate BMI before (based on DM21_01 - weight before and DM21_03 - height)
combined_data_clean$BMI_before <- combined_data_clean$DM21_01 / (combined_data_clean$DM21_03 / 100)^2
# Calculate BMI after (based on DM21_02 - weight after and DM21_03 - height)
combined_data_clean$BMI_after <- combined_data_clean$DM21_02 / (combined_data_clean$DM21_03 / 100)^2
combined_data_clean <- combined_data_clean[!is.na(combined_data_clean$BMI_after), ]
combined_data_clean <- combined_data_clean[combined_data_clean$DM21_03 > 70, ]
calculating PROM scores
IMET
# Variables of interest
variables_to_convert <- paste0("ME01_0", 1:8)
# Convert to numeric, replace -9 with NA, and recode values from 1–11 to 0–10
combined_data_clean[variables_to_convert] <- combined_data_clean[variables_to_convert] |>
lapply(function(column) {
numeric_column <- as.numeric(column) # Convert to numeric
numeric_column[numeric_column == -9] <- NA # Replace -9 with NA
numeric_column <- numeric_column - 1 # Recode values: 1–11 to 0–10
return(numeric_column)
}) |>
as.data.frame() # Convert back to data frame
# Create a new variable by summing the values of the selected columns,
# setting NA for participants with any missing values in the selected variables
combined_data_clean$ME01_sum <- apply(combined_data_clean[variables_to_convert], 1, function(row) {
if (any(is.na(row))) {
return(NA) # Set NA if any variable is missing
} else {
return(sum(row)) # Calculate the sum if all variables have values
}
})
combined_data_clean <- combined_data_clean %>%
mutate(
ME02_0_new = ifelse(ME02_01 == -9, NA, as.numeric(ME02_01) - 1)
)
combined_data_clean$IMET_total <- ifelse(is.na(combined_data_clean$ME02_0_new) | is.na(combined_data_clean$ME01_sum), NA, combined_data_clean$ME02_0_new + combined_data_clean$ME01_sum)
FAS
# Step 1: Dynamically find columns matching "FA01_01" to "FA01_10"
variables_to_convert2 <- grep("^FA01_0[1-9]$|^FA01_10$", names(combined_data_clean), value = TRUE)
# Step 2: Convert to numeric and replace -9 with NA
combined_data_clean[variables_to_convert2] <- as.data.frame(
lapply(combined_data_clean[variables_to_convert2], function(column) {
numeric_column <- as.numeric(as.character(column)) # Convert to numeric
numeric_column[numeric_column == -9] <- NA # Replace -9 with NA
return(numeric_column)
})
)
# Step 3: Verify the changes
# Check the selected variables
# Confirm numeric conversion
# Step 3: Recode scores for questions 4 and 10
combined_data_clean$FA01_04 <- dplyr::recode(
combined_data_clean$FA01_04,
`1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1,
.default = combined_data_clean$FA01_04
)
combined_data_clean$FA01_10 <- dplyr::recode(
combined_data_clean$FA01_10,
`1` = 5, `2` = 4, `3` = 3, `4` = 2, `5` = 1,
.default = combined_data_clean$FA01_10
)
# Step 4: Calculate the total FAS score only for participants who answered all questions
combined_data_clean$FAS_total <- apply(combined_data_clean[variables_to_convert2], 1, function(row) {
if (any(is.na(row))) {
return(NA) # Set NA if any question is unanswered
} else {
return(sum(row)) # Calculate the sum if all questions are answered
}
})
EQ5D 3L
combined_data_clean$EQ05 <- as.numeric(as.character(combined_data_clean$EQ05))
combined_data_clean$EQ06 <- as.numeric(as.character(combined_data_clean$EQ06))
combined_data_clean$EQ_08_01 <- as.numeric(as.character(combined_data_clean$EQ_08_01))
# Replace specific values in EQ05 and EQ06 columns
combined_data_clean <- combined_data_clean %>%
mutate(
EQ05 = ifelse(EQ05 == 4, 2, EQ05), # Replace 4 with 2 in EQ05
EQ06 = ifelse(EQ06 == 4, 2, EQ06) # Replace 4 with 2 in EQ06
)
combined_data_clean$EQ5D3Lcode <- paste0(combined_data_clean$EQ02, combined_data_clean$EQ03, combined_data_clean$EQ04, combined_data_clean$EQ05, combined_data_clean$EQ06)
combined_data_clean$EQ5D_3L <- eq5d(
scores = combined_data_clean$EQ5D3Lcode,
version = "3L",
country = "Germany",
type = "VAS",
ignore.invalid = TRUE # Handle invalid cases gracefully
)
NOW grouping participants to PCS and - NON PCS groups (1= PCS , 0= non PSC)
combined_data_clean <- combined_data_clean %>%
mutate(PCS_binary = case_when(
# Condition 1: Participants who are in dftc3hc (or dftc3) with SY29 == 2 becomes 0
((QUESTNNR == "dftc3hc" & SY29 == 2) | (QUESTNNR == "dftc3" & SY29 == 2)) ~ 1,
# Condition 2: Participants who are in dftc3hc with SY29 == 1
# or in dftc3hc with SY18_01 == 2 becomes 1
((QUESTNNR == "dftc3hc" & SY29 == 1) | (QUESTNNR == "dftc3hc" & SY18_01 == 2)) ~ 0,
# If none of the conditions are met, return NA
TRUE ~ NA_integer_
))
# Assume you have a data frame called 'data' with:
# - a numeric variable 'BMI'
# - a variable 'PCS' indicating PCS status (e.g., 1 = PCS, 0 = non-PCS)
# Function to perform the analysis for a given BMI cutoff
analyze_bmi_cutoff <- function(combined_data_clean, cutoff) {
# Create a binary variable for overweight vs. normal
combined_data_clean <- combined_data_clean %>%
mutate(
overweight_binary = ifelse(BMI_after > cutoff, "Overweight", "Normal")
)
# Create a 2x2 contingency table of PCS vs overweight
table_2x2 <- table(PCS = combined_data_clean$PCS_binary, Overweight = combined_data_clean$overweight_binary)
print(paste("Contingency table for BMI cutoff >", cutoff))
print(table_2x2)
# Use Fisher's exact test to calculate the odds ratio and CI
fisher_result <- fisher.test(table_2x2)
print(fisher_result)
return(fisher_result)
}
# Analyze using a BMI cutoff of >25
result_25 <- analyze_bmi_cutoff(combined_data_clean, cutoff = 25)
## [1] "Contingency table for BMI cutoff > 25"
## Overweight
## PCS Normal Overweight
## 0 55 28
## 1 137 169
##
## Fisher's Exact Test for Count Data
##
## data: table_2x2
## p-value = 0.0005282
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.420281 4.186944
## sample estimates:
## odds ratio
## 2.417585
# Analyze using a BMI cutoff of >30
result_30 <- analyze_bmi_cutoff(combined_data_clean, cutoff = 30)
## [1] "Contingency table for BMI cutoff > 30"
## Overweight
## PCS Normal Overweight
## 0 74 9
## 1 225 81
##
## Fisher's Exact Test for Count Data
##
## data: table_2x2
## p-value = 0.002024
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.389426 7.025487
## sample estimates:
## odds ratio
## 2.953125
combined_data_clean$BMI_cat <- cut(combined_data_clean$BMI_after,
breaks = c(-Inf, 18.5, 24.9, 29.9, 34.9, 39.9, Inf),
labels = c('Underweight',
'Normal weight',
'Pre-obesity',
'Obesity class I',
'Obesity class II',
'Obesity class III'),
include.lowest = TRUE,
right=TRUE)
tab <- table(combined_data_clean$PCS_binary, combined_data_clean$BMI_cat)
print(tab)
##
## Underweight Normal weight Pre-obesity Obesity class I Obesity class II
## 0 4 49 21 7 1
## 1 13 122 90 38 27
##
## Obesity class III
## 0 1
## 1 16
combined_data_clean$BMI_cat <- relevel(combined_data_clean$BMI_cat, ref = "Normal weight")
model <- glm(PCS_binary ~ BMI_cat, data = combined_data_clean, family = binomial(link = "logit"))
summary(model)
##
## Call:
## glm(formula = PCS_binary ~ BMI_cat, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.9122 0.1691 5.393 6.91e-08 ***
## BMI_catUnderweight 0.2665 0.5963 0.447 0.6550
## BMI_catPre-obesity 0.5431 0.2955 1.838 0.0661 .
## BMI_catObesity class I 0.7795 0.4447 1.753 0.0796 .
## BMI_catObesity class II 2.3836 1.0318 2.310 0.0209 *
## BMI_catObesity class III 1.8604 1.0445 1.781 0.0749 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 403.30 on 388 degrees of freedom
## Residual deviance: 386.23 on 383 degrees of freedom
## (94 observations deleted due to missingness)
## AIC: 398.23
##
## Number of Fisher Scoring iterations: 5
as there is very low number of non pcs participants in each obesity category as it can be seen i tried again with grouping them together
combined_data_clean$BMI_cat_grouped <- cut(combined_data_clean$BMI_after,
breaks = c(-Inf, 18.5, 24.9, 29.9, Inf),
labels = c('Underweight',
'Normal weight',
'Pre-obesity',
'Obesity'),
include.lowest = TRUE,
right = TRUE)
tab_grouped <- table(combined_data_clean$PCS_binary, combined_data_clean$BMI_cat_grouped)
print(tab_grouped)
##
## Underweight Normal weight Pre-obesity Obesity
## 0 4 49 21 9
## 1 13 122 90 81
combined_data_clean$BMI_cat_grouped <- relevel(combined_data_clean$BMI_cat_grouped, ref = "Normal weight")
model_grouped <- glm(PCS_binary ~ BMI_cat_grouped, data = combined_data_clean, family = binomial(link = "logit"))
summary(model_grouped)
##
## Call:
## glm(formula = PCS_binary ~ BMI_cat_grouped, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.9122 0.1691 5.393 6.91e-08 ***
## BMI_cat_groupedUnderweight 0.2665 0.5963 0.447 0.654965
## BMI_cat_groupedPre-obesity 0.5431 0.2955 1.838 0.066106 .
## BMI_cat_groupedObesity 1.2850 0.3899 3.296 0.000982 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 403.30 on 388 degrees of freedom
## Residual deviance: 389.61 on 385 degrees of freedom
## (94 observations deleted due to missingness)
## AIC: 397.61
##
## Number of Fisher Scoring iterations: 4
1: obese 0 : non obese
combined_data_clean$BMI_binary <- ifelse(combined_data_clean$BMI_after >= 30, 1, 0)
# BMI ~ PCS_group + IMET
model1 <- glm(BMI_binary ~ PCS_binary + IMET_total,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group + EQ-5D-3L index score
model2 <- glm(BMI_binary ~ PCS_binary + EQ5D_3L,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group + EQ-5D VAS
model3 <- glm(BMI_binary ~ PCS_binary + EQ_08_01,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group + FAS Score
model4 <- glm(BMI_binary ~ PCS_binary + FAS_total,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group
model10 <- glm(BMI_binary ~ PCS_binary ,
data = combined_data_clean,
family = binomial(link = "logit"))
summary(model10)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.107 0.353 -5.968 2.4e-09 ***
## PCS_binary 1.085 0.376 2.886 0.0039 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 420.83 on 388 degrees of freedom
## Residual deviance: 410.66 on 387 degrees of freedom
## (94 observations deleted due to missingness)
## AIC: 414.66
##
## Number of Fisher Scoring iterations: 4
summary(model1)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary + IMET_total, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.417234 0.388212 -6.227 4.77e-10 ***
## PCS_binary 0.860822 0.434426 1.982 0.0475 *
## IMET_total 0.012546 0.006166 2.035 0.0419 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 393.27 on 363 degrees of freedom
## Residual deviance: 376.93 on 361 degrees of freedom
## (119 observations deleted due to missingness)
## AIC: 382.93
##
## Number of Fisher Scoring iterations: 4
summary(model2)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary + EQ5D_3L, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.3028 0.5542 -2.351 0.0187 *
## PCS_binary 0.7986 0.4034 1.980 0.0477 *
## EQ5D_3L -0.9282 0.5016 -1.850 0.0642 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 405.92 on 378 degrees of freedom
## Residual deviance: 392.92 on 376 degrees of freedom
## (104 observations deleted due to missingness)
## AIC: 398.92
##
## Number of Fisher Scoring iterations: 4
summary(model3)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary + EQ_08_01, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.77754 0.59215 -1.313 0.18915
## PCS_binary 0.47230 0.43662 1.082 0.27937
## EQ_08_01 -0.01734 0.00644 -2.693 0.00708 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 403.85 on 374 degrees of freedom
## Residual deviance: 386.72 on 372 degrees of freedom
## (108 observations deleted due to missingness)
## AIC: 392.72
##
## Number of Fisher Scoring iterations: 4
summary(model4)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary + FAS_total, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.21220 0.53884 -5.961 2.5e-09 ***
## PCS_binary 0.46284 0.42723 1.083 0.27865
## FAS_total 0.04822 0.01647 2.927 0.00342 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 396.90 on 370 degrees of freedom
## Residual deviance: 378.43 on 368 degrees of freedom
## (112 observations deleted due to missingness)
## AIC: 384.43
##
## Number of Fisher Scoring iterations: 4
vif_values <- vif(model1)
print(vif_values)
## PCS_binary IMET_total
## 1.197139 1.197139
vif_values <- vif(model2)
print(vif_values)
## PCS_binary EQ5D_3L
## 1.138709 1.138709
vif_values <- vif(model3)
print(vif_values)
## PCS_binary EQ_08_01
## 1.318723 1.318723
vif_values <- vif(model4)
print(vif_values)
## PCS_binary FAS_total
## 1.248504 1.248504
so there is no important co-linearity between predictors
# BMI ~ PCS_group * IMET
model5 <- glm(BMI_binary ~ PCS_binary * IMET_total,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group * EQ-5D-3L index score
model6 <- glm(BMI_binary ~ PCS_binary * EQ5D_3L,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group * EQ-5D VAS
model7 <- glm(BMI_binary ~ PCS_binary * EQ_08_01,
data = combined_data_clean,
family = binomial(link = "logit"))
# BMI ~ PCS_group * FAS Score
model8 <- glm(BMI_binary ~ PCS_binary * FAS_total,
data = combined_data_clean,
family = binomial(link = "logit"))
summary(model5)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary * IMET_total, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.380552 0.475925 -5.002 5.68e-07 ***
## PCS_binary 0.810198 0.580135 1.397 0.163
## IMET_total 0.010376 0.017852 0.581 0.561
## PCS_binary:IMET_total 0.002474 0.019034 0.130 0.897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 393.27 on 363 degrees of freedom
## Residual deviance: 376.91 on 360 degrees of freedom
## (119 observations deleted due to missingness)
## AIC: 384.91
##
## Number of Fisher Scoring iterations: 4
summary(model6)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary * EQ5D_3L, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8354 1.5255 -1.203 0.229
## PCS_binary 1.3684 1.5606 0.877 0.381
## EQ5D_3L -0.3097 1.7028 -0.182 0.856
## PCS_binary:EQ5D_3L -0.6839 1.7833 -0.384 0.701
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 405.92 on 378 degrees of freedom
## Residual deviance: 392.76 on 375 degrees of freedom
## (104 observations deleted due to missingness)
## AIC: 400.76
##
## Number of Fisher Scoring iterations: 4
summary(model7)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary * EQ_08_01, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.59937 1.19992 0.500 0.6174
## PCS_binary -1.04263 1.23918 -0.841 0.4001
## EQ_08_01 -0.03670 0.01652 -2.222 0.0263 *
## PCS_binary:EQ_08_01 0.02276 0.01787 1.273 0.2029
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 403.85 on 374 degrees of freedom
## Residual deviance: 385.08 on 371 degrees of freedom
## (108 observations deleted due to missingness)
## AIC: 393.08
##
## Number of Fisher Scoring iterations: 5
summary(model8)
##
## Call:
## glm(formula = BMI_binary ~ PCS_binary * FAS_total, family = binomial(link = "logit"),
## data = combined_data_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.147836 0.935230 -3.366 0.000763 ***
## PCS_binary 0.372310 1.159726 0.321 0.748185
## FAS_total 0.045577 0.035564 1.282 0.199998
## PCS_binary:FAS_total 0.003364 0.040151 0.084 0.933227
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 396.90 on 370 degrees of freedom
## Residual deviance: 378.42 on 367 degrees of freedom
## (112 observations deleted due to missingness)
## AIC: 386.42
##
## Number of Fisher Scoring iterations: 4