1. General discription

This notebook contains the statistical analyses for the SPPT dataset.

It takes as input the following files:

For additional details we refer to the README.docx file of the repository, and the paper.

2. Import packages and load data

Import packages:

library(readr)
library(tidyverse)
library(tibble)
library(dplyr)
library(readxl)
library(jtools)
library(psych)
library(ggplot2)
library(rstatix)
library(huxtable)
library(caret)

Load beta estimates:

# Load brain data
df_betas <- read_csv("brain/df_betas_repo.csv",col_types = cols())

# Subjects as factor
df_betas$ID <- factor(df_betas$ID)

# Get block + case from trial string
df_betas <- add_column(df_betas, block =sapply(strsplit(as.character(df_betas$case), "\\Case"), `[`, 1))
df_betas$case <- sapply(strsplit(as.character(df_betas$case), "\\Case"), `[`, 2)


# Remove case 51 because of an error with stimulus presentation for this case
remove_case <- c(51)
df_betas <- df_betas[!df_betas$case %in% remove_case, ]

Center estimates per participant per VOI:

center <- function(x, na.rm = TRUE) (x - mean(x, na.rm = na.rm))

df_betas_centered <- df_betas %>% 
  group_by(ID) %>% 
   mutate_at(vars(matches('mask')), center)

Add answers (case, choice, stars - confidence rating):

# Load and concatenate data
ff <- list.files("task-answers", pattern = "NNIPanswers.txt", recursive = TRUE, full.names = TRUE)
star_data <- do.call(rbind, lapply(ff, read.delim))

# Add subject number
sub_nr <- rep(2:37, each=45)
sub_nr <- sprintf("%02d", sub_nr)
ID <- paste("sub-",sub_nr, sep="")
star_data <- cbind(ID,star_data)

# Rename and fix columns
names(star_data)[names(star_data) == "Case"] <- "case"
star_data <- star_data[c("ID","case","Stars")]
star_data <- star_data %>% mutate(case = as.character(case))

# Combine data frames
all_data_df <- star_data %>% right_join(df_betas_centered, by=c("ID","case"))
all_data_df <- all_data_df %>% relocate(Stars, .after=last_col())

Add stock metrics:

# Add case info
Stock_metrics <- read_excel("Stock_metrics.xlsx")

# Convert case to factor
Stock_metrics$case <- factor(Stock_metrics$case)

# Match by case
all_data_df <- cbind(all_data_df, Stock_metrics[match(all_data_df$case, Stock_metrics$case),])
all_data_df <- all_data_df[, !duplicated(colnames(all_data_df))]

# Log-transform CMC because it is highly skewed
all_data_df$log_CMC <- log(all_data_df$CMC)

# Add correct
all_data_df$cor_p1Y <- ifelse(all_data_df$choice==all_data_df$Rel_p1Y_pos, 1, 0) #Correct prediction at 1-year in the future (1 if equal, 0 if not)

And Survey scores: (This also removes subjects 17,18 - these subjects are removed because of excessive head-motion)

# Import data
survey_scores <- read_csv("Survey_scores.csv",col_types = cols())

# Join to all_data
all_data_df <- merge(all_data_df,survey_scores, by="ID")

Make names unique

names(all_data_df)<-make.names(names(all_data_df),unique = TRUE)

3. Descriptive analyses: Participants

Calculate average per participant

Sub_mean <- all_data_df %>%
  group_by(ID) %>%
  summarise_at(vars(-case,-block,-trial), list(~ mean(., na.rm=TRUE)))

#Sub_mean

Calculate age and years of experience:

x=Sub_mean[50:53]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
                           Age finance_exp  asset_exp equity_exp
Stand dev            9.0894250   9.9644757  9.3608702   9.552195
Mean                47.5588235  19.2205882 12.3676471  14.926471
n                   34.0000000  34.0000000 34.0000000  34.000000
Median              49.0000000  20.0000000 10.0000000  13.500000
CoeffofVariation     0.1911196   0.5184272  0.7568837   0.639950
Minimum             29.0000000   0.0000000  0.0000000   0.000000
Maximun             66.0000000  40.0000000 40.0000000  40.000000
Upper Quantile.100% 66.0000000  40.0000000 40.0000000  40.000000
LowerQuartile.0%    29.0000000   0.0000000  0.0000000   0.000000

Calculate self-reported difficulty/ realism and test if above mid-point of scale:

x=Sub_mean[55:56]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
                     realistic  difficult
Stand dev            1.4129526  0.9883456
Mean                 4.0588235  4.4117647
n                   34.0000000 34.0000000
Median               4.0000000  5.0000000
CoeffofVariation     0.3481187  0.2240250
Minimum              1.0000000  2.0000000
Maximun              7.0000000  6.0000000
Upper Quantile.100%  7.0000000  6.0000000
LowerQuartile.0%     1.0000000  2.0000000
t.test(Sub_mean$realistic, mu=4, alternative="two.sided")

    One Sample t-test

data:  Sub_mean$realistic
t = 0.24275, df = 33, p-value = 0.8097
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 3.565821 4.551826
sample estimates:
mean of x 
 4.058824 
t.test(Sub_mean$difficult, mu=4, alternative="two.sided")

    One Sample t-test

data:  Sub_mean$difficult
t = 2.4293, df = 33, p-value = 0.02074
alternative hypothesis: true mean is not equal to 4
95 percent confidence interval:
 4.066915 4.756615
sample estimates:
mean of x 
 4.411765 

Self-reported importance of information screens:

x=Sub_mean[61:65]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
                     prof_imp   fund_imp   rela_imp  graph_imp   news_imp
Stand dev            1.225109  1.0294245  1.1930428  0.9650764  1.2878840
Mean                 1.882353  3.9705882  2.9705882  2.9117647  3.9117647
n                   34.000000 34.0000000 34.0000000 34.0000000 34.0000000
Median               1.000000  4.0000000  3.0000000  3.0000000  4.0000000
CoeffofVariation     0.650839  0.2592625  0.4016184  0.3314404  0.3292335
Minimum              1.000000  2.0000000  1.0000000  1.0000000  1.0000000
Maximun              5.000000  5.0000000  5.0000000  5.0000000  5.0000000
Upper Quantile.100%  5.000000  5.0000000  5.0000000  5.0000000  5.0000000
LowerQuartile.0%     1.000000  2.0000000  1.0000000  1.0000000  1.0000000

Reshape data and conduct pairwise tests:

x=Sub_mean[c(1,61:65)]
importance_scores <- pivot_longer(x, cols=-ID,names_to='Screen',values_to='Importance')

pwc <- importance_scores %>%
  pairwise_t_test(
    Importance ~ Screen, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc

4. Descriptive analyses: Investment cases

Calculate average per case

Case_mean <- all_data_df %>%
  group_by(case) %>%
  summarise_at(vars(-ID,-block,-trial), funs(mean(., na.rm=TRUE)))

Case_mean

Average prediction and correct prediction

x=Case_mean[c(32,49)]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
                         choice     cor_p1Y
Stand dev            0.24337304  0.24196615
Mean                 0.49547657  0.52622524
n                   44.00000000 44.00000000
Median               0.52941176  0.56684492
CoeffofVariation     0.49118981  0.45981479
Minimum              0.05882353  0.05882353
Maximun              0.88235294  0.88235294
Upper Quantile.100%  0.88235294  0.88235294
LowerQuartile.0%     0.05882353  0.05882353

5. Statistical analyses: Behavior

Logistic regression: predictions to future market performance

Rel_p1Y_pos = Relative performance 1 year in the future (underperforming/overperforming, coded as 0/1)

formula=paste("Rel_p1Y_pos~choice",sep="")
print(formula)
[1] "Rel_p1Y_pos~choice"
out_choice <- glm(formula,data=Case_mean,family = binomial(link = logit))

print(summ(out_choice,digits = getOption("jtools-digits", default = 3), scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.525, p = 0.469
Pseudo-R² (Cragg-Uhler) = 0.016
Pseudo-R² (McFadden) = 0.009
AIC = 64.472, BIC = 68.041 

Standard errors: MLE
---------------------------------------------------
                      Est.    S.E.   z val.       p
----------------- -------- ------- -------- -------
(Intercept)         -0.000   0.303   -0.000   1.000
choice               0.222   0.308    0.720   0.471
---------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Correlation between self-reported confidence (Stars) and prediction accuracy

cor.test(Case_mean$Stars, Case_mean$cor_p1Y)

    Pearson's product-moment correlation

data:  Case_mean$Stars and Case_mean$cor_p1Y
t = 0.15821, df = 42, p-value = 0.875
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.2744637  0.3189745
sample estimates:
       cor 
0.02440524 

6. Factor analysis - Brain activity

Select columns:

nacc <- Case_mean[, c(4,9,14,19,24)]
KMO(r=cor(nacc))
Kaiser-Meyer-Olkin factor adequacy
Call: KMO(r = cor(nacc))
Overall MSA =  0.74
MSA for each item = 
 fund.Bilat_NAcc_mask graph.Bilat_NAcc_mask  news.Bilat_NAcc_mask  prof.Bilat_NAcc_mask  rela.Bilat_NAcc_mask 
                 0.73                  0.80                  0.70                  0.69                  0.77 

Positive correlation accross information screens:

median(cor(nacc))
[1] 0.5581268

Factor analysis:

fit <- factanal(nacc, 2,rotation = "promax", scores='regression')

print(fit, digits=3, cutoff=0.3, sort=TRUE)

Call:
factanal(x = nacc, factors = 2, scores = "regression", rotation = "promax")

Uniquenesses:
 fund.Bilat_NAcc_mask graph.Bilat_NAcc_mask  news.Bilat_NAcc_mask  prof.Bilat_NAcc_mask  rela.Bilat_NAcc_mask 
                0.172                 0.371                 0.396                 0.388                 0.386 

Loadings:
                      Factor1 Factor2
fund.Bilat_NAcc_mask   0.668   0.381 
graph.Bilat_NAcc_mask  0.675         
prof.Bilat_NAcc_mask   0.865         
news.Bilat_NAcc_mask           0.831 
rela.Bilat_NAcc_mask           0.702 

               Factor1 Factor2
SS loadings      1.692   1.432
Proportion Var   0.338   0.286
Cumulative Var   0.338   0.625

Factor Correlations:
        Factor1 Factor2
Factor1   1.000   0.463
Factor2   0.463   1.000

Test of the hypothesis that 2 factors are sufficient.
The chi square statistic is 0.36 on 1 degree of freedom.
The p-value is 0.549 

Add factor scores, creating df for regression:

NAcc_factor_scores <- data.frame(fit$scores)
reg_df <- merge(Case_mean,NAcc_factor_scores,by="row.names",all.x=TRUE)

7. Regression analysis: Information screens

Regression analysis per information screen (for Table 2):

formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask"
out_prof <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(4) = 10.54, p = 0.03
Pseudo-R² (Cragg-Uhler) = 0.28
Pseudo-R² (McFadden) = 0.17
AIC = 60.45, BIC = 69.37 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.01   0.34     0.03   0.98
log_CMC                       0.31   0.35     0.88   0.38
prof.Bilat_NAcc_mask          1.36   0.62     2.20   0.03
prof.Bilat_MPFC_mask          0.28   0.61     0.47   0.64
prof.Bilat_AIns_mask         -0.75   0.55    -1.37   0.17
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask"
out_graph <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(4) = 12.72, p = 0.01
Pseudo-R² (Cragg-Uhler) = 0.33
Pseudo-R² (McFadden) = 0.21
AIC = 58.28, BIC = 67.20 

Standard errors: MLE
----------------------------------------------------------
                               Est.   S.E.   z val.      p
--------------------------- ------- ------ -------- ------
(Intercept)                   -0.70   0.64    -1.09   0.28
Rel_m3Y_pos                    1.06   0.85     1.25   0.21
graph.Bilat_NAcc_mask          2.14   0.86     2.47   0.01
graph.Bilat_MPFC_mask         -0.83   0.59    -1.41   0.16
graph.Bilat_AIns_mask         -0.30   0.75    -0.39   0.69
----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask"
out_fund <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(8) = 6.98, p = 0.54
Pseudo-R² (Cragg-Uhler) = 0.20
Pseudo-R² (McFadden) = 0.11
AIC = 72.01, BIC = 88.07 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.00   0.33    -0.01   0.99
f_s                          -0.29   0.47    -0.62   0.54
f_ebit                        0.06   0.43     0.13   0.89
f_ebitmargin                  0.26   0.49     0.52   0.60
f_roe                        -0.29   0.73    -0.40   0.69
f_roic                       -0.15   0.59    -0.25   0.80
fund.Bilat_NAcc_mask          1.46   0.68     2.13   0.03
fund.Bilat_MPFC_mask          0.11   0.51     0.21   0.83
fund.Bilat_AIns_mask         -1.23   0.77    -1.61   0.11
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask"
out_rela <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(7) = 8.35, p = 0.30
Pseudo-R² (Cragg-Uhler) = 0.23
Pseudo-R² (McFadden) = 0.14
AIC = 68.65, BIC = 82.92 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.08   0.45     0.19   0.85
rv_evs                        0.27   0.65     0.41   0.68
rv_ebitda                     1.05   0.70     1.51   0.13
rv_pe                        -0.81   0.70    -1.16   0.25
rv_pb                         0.78   1.96     0.40   0.69
rela.Bilat_NAcc_mask          0.65   0.57     1.14   0.26
rela.Bilat_MPFC_mask          0.42   0.50     0.85   0.40
rela.Bilat_AIns_mask         -0.86   0.58    -1.47   0.14
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask"
out_news <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(4) = 4.10, p = 0.39
Pseudo-R² (Cragg-Uhler) = 0.12
Pseudo-R² (McFadden) = 0.07
AIC = 66.90, BIC = 75.82 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.14   0.39     0.37   0.71
news_incgr                    1.16   1.48     0.78   0.43
news.Bilat_NAcc_mask         -0.34   0.49    -0.71   0.48
news.Bilat_MPFC_mask          0.24   0.49     0.49   0.62
news.Bilat_AIns_mask          0.41   0.50     0.82   0.41
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Output table:

coef_names <- c("Current Market Capitalization" = "log_CMC", "Profile - Nacc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "36M Performance"='Rel_m3Y_pos', "Graph - Nacc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "Fundamentals - Nacc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
                "Relative Valuation - Nacc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "Income growth" = "news_incgr","News item - Nacc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask" )

# Uncomment to create table
# quick_docx(export_summs(out_prof,out_graph,out_fund,out_rela,out_news, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#              model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#              statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-2.docx')

Only neural predictors:

formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask"
out_prof_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 9.76, p = 0.02
Pseudo-R² (Cragg-Uhler) = 0.27
Pseudo-R² (McFadden) = 0.16
AIC = 59.24, BIC = 66.38 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.01   0.34     0.02   0.98
prof.Bilat_NAcc_mask          1.37   0.61     2.23   0.03
prof.Bilat_MPFC_mask          0.16   0.59     0.27   0.79
prof.Bilat_AIns_mask         -0.68   0.54    -1.26   0.21
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask"
out_graph_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 11.06, p = 0.01
Pseudo-R² (Cragg-Uhler) = 0.30
Pseudo-R² (McFadden) = 0.18
AIC = 57.94, BIC = 65.07 

Standard errors: MLE
----------------------------------------------------------
                               Est.   S.E.   z val.      p
--------------------------- ------- ------ -------- ------
(Intercept)                   -0.03   0.34    -0.10   0.92
graph.Bilat_NAcc_mask          1.80   0.77     2.32   0.02
graph.Bilat_MPFC_mask         -0.89   0.58    -1.53   0.13
graph.Bilat_AIns_mask         -0.19   0.72    -0.26   0.80
----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask"
out_fund_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 5.55, p = 0.14
Pseudo-R² (Cragg-Uhler) = 0.16
Pseudo-R² (McFadden) = 0.09
AIC = 63.45, BIC = 70.59 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.01   0.32    -0.03   0.98
fund.Bilat_NAcc_mask          1.38   0.65     2.12   0.03
fund.Bilat_MPFC_mask          0.17   0.49     0.34   0.73
fund.Bilat_AIns_mask         -1.11   0.73    -1.52   0.13
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask"
out_rela_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 3.93, p = 0.27
Pseudo-R² (Cragg-Uhler) = 0.11
Pseudo-R² (McFadden) = 0.06
AIC = 65.06, BIC = 72.20 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.00   0.32    -0.02   0.99
rela.Bilat_NAcc_mask          0.79   0.56     1.40   0.16
rela.Bilat_MPFC_mask          0.37   0.48     0.76   0.45
rela.Bilat_AIns_mask         -0.87   0.57    -1.53   0.12
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask"
out_news_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 2.56, p = 0.46
Pseudo-R² (Cragg-Uhler) = 0.08
Pseudo-R² (McFadden) = 0.04
AIC = 66.43, BIC = 73.57 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.00   0.31    -0.01   0.99
news.Bilat_NAcc_mask         -0.41   0.48    -0.85   0.39
news.Bilat_MPFC_mask          0.31   0.46     0.67   0.50
news.Bilat_AIns_mask          0.51   0.47     1.10   0.27
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Reduced models with only NAcc predictors (without MPFC and AIns) and stock metrics for each screen:

formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask"
out_prof_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 8.57, p = 0.01
Pseudo-R² (Cragg-Uhler) = 0.24
Pseudo-R² (McFadden) = 0.14
AIC = 58.42, BIC = 63.78 

Standard errors: MLE
--------------------------------------------------------
                             Est.   S.E.   z val.      p
-------------------------- ------ ------ -------- ------
(Intercept)                  0.02   0.33     0.05   0.96
log_CMC                      0.26   0.33     0.77   0.44
prof.Bilat_NAcc_mask         0.98   0.38     2.59   0.01
--------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask"
out_graph_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 9.32, p = 0.01
Pseudo-R² (Cragg-Uhler) = 0.25
Pseudo-R² (McFadden) = 0.15
AIC = 57.68, BIC = 63.03 

Standard errors: MLE
----------------------------------------------------------
                               Est.   S.E.   z val.      p
--------------------------- ------- ------ -------- ------
(Intercept)                   -0.63   0.59    -1.06   0.29
Rel_m3Y_pos                    1.08   0.80     1.34   0.18
graph.Bilat_NAcc_mask          1.23   0.47     2.59   0.01
----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask"
out_fund_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(6) = 3.72, p = 0.71
Pseudo-R² (Cragg-Uhler) = 0.11
Pseudo-R² (McFadden) = 0.06
AIC = 71.27, BIC = 83.76 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.00   0.32     0.01   0.99
f_s                          -0.11   0.45    -0.24   0.81
f_ebit                       -0.09   0.41    -0.22   0.82
f_ebitmargin                  0.31   0.47     0.66   0.51
f_roe                        -0.13   0.69    -0.19   0.85
f_roic                       -0.24   0.63    -0.38   0.71
fund.Bilat_NAcc_mask          0.50   0.36     1.40   0.16
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask"
out_rela_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 5.87, p = 0.32
Pseudo-R² (Cragg-Uhler) = 0.17
Pseudo-R² (McFadden) = 0.10
AIC = 67.13, BIC = 77.84 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                   0.11   0.49     0.23   0.82
rv_evs                        0.16   0.68     0.23   0.82
rv_ebitda                     0.99   0.65     1.52   0.13
rv_pe                        -0.81   0.64    -1.25   0.21
rv_pb                         0.93   2.38     0.39   0.70
rela.Bilat_NAcc_mask          0.27   0.35     0.78   0.43
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask"
out_news_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 2.77, p = 0.25
Pseudo-R² (Cragg-Uhler) = 0.08
Pseudo-R² (McFadden) = 0.05
AIC = 64.23, BIC = 69.58 

Standard errors: MLE
--------------------------------------------------------
                             Est.   S.E.   z val.      p
-------------------------- ------ ------ -------- ------
(Intercept)                  0.17   0.38     0.45   0.65
news_incgr                   1.45   1.40     1.03   0.30
news.Bilat_NAcc_mask         0.06   0.32     0.19   0.85
--------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Reduced models with only NAcc predictors (without MPFC and AIns) and without stock metrics for each screen:

formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~prof.Bilat_NAcc_mask"
out_prof_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 7.98, p = 0.00
Pseudo-R² (Cragg-Uhler) = 0.22
Pseudo-R² (McFadden) = 0.13
AIC = 57.02, BIC = 60.59 

Standard errors: MLE
--------------------------------------------------------
                             Est.   S.E.   z val.      p
-------------------------- ------ ------ -------- ------
(Intercept)                  0.01   0.33     0.02   0.99
prof.Bilat_NAcc_mask         0.95   0.37     2.56   0.01
--------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~graph.Bilat_NAcc_mask"
out_graph_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 7.37, p = 0.01
Pseudo-R² (Cragg-Uhler) = 0.21
Pseudo-R² (McFadden) = 0.12
AIC = 57.62, BIC = 61.19 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
--------------------------- ------ ------ -------- ------
(Intercept)                   0.02   0.33     0.06   0.96
graph.Bilat_NAcc_mask         0.95   0.39     2.40   0.02
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~fund.Bilat_NAcc_mask"
out_fund_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 2.70, p = 0.10
Pseudo-R² (Cragg-Uhler) = 0.08
Pseudo-R² (McFadden) = 0.04
AIC = 62.30, BIC = 65.87 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.00   0.31    -0.01   0.99
fund.Bilat_NAcc_mask          0.53   0.34     1.55   0.12
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rela.Bilat_NAcc_mask"
out_rela_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 1.31, p = 0.25
Pseudo-R² (Cragg-Uhler) = 0.04
Pseudo-R² (McFadden) = 0.02
AIC = 63.69, BIC = 67.25 

Standard errors: MLE
--------------------------------------------------------
                             Est.   S.E.   z val.      p
-------------------------- ------ ------ -------- ------
(Intercept)                  0.00   0.31     0.01   0.99
rela.Bilat_NAcc_mask         0.36   0.33     1.10   0.27
--------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news.Bilat_NAcc_mask"
out_news_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.17, p = 0.68
Pseudo-R² (Cragg-Uhler) = 0.01
Pseudo-R² (McFadden) = 0.00
AIC = 64.82, BIC = 68.39 

Standard errors: MLE
---------------------------------------------------------
                              Est.   S.E.   z val.      p
-------------------------- ------- ------ -------- ------
(Intercept)                  -0.00   0.30    -0.00   1.00
news.Bilat_NAcc_mask          0.13   0.31     0.42   0.68
---------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

T-tests for NAcc activity (underperforming/overperforming)

t.test(prof.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

    Two Sample t-test

data:  prof.Bilat_NAcc_mask by Rel_p1Y_pos
t = -2.9335, df = 42, p-value = 0.005409
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.12753448 -0.02357879
sample estimates:
mean in group 0 mean in group 1 
    -0.03770480      0.03785183 
t.test(graph.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

    Two Sample t-test

data:  graph.Bilat_NAcc_mask by Rel_p1Y_pos
t = -2.7664, df = 42, p-value = 0.00839
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.12927029 -0.02021856
sample estimates:
mean in group 0 mean in group 1 
    -0.03707873      0.03766570 
t.test(fund.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

    Two Sample t-test

data:  fund.Bilat_NAcc_mask by Rel_p1Y_pos
t = -1.6219, df = 42, p-value = 0.1123
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.086015229  0.009360935
sample estimates:
mean in group 0 mean in group 1 
    -0.01902793      0.01929922 
t.test(rela.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

    Two Sample t-test

data:  rela.Bilat_NAcc_mask by Rel_p1Y_pos
t = -1.1214, df = 42, p-value = 0.2685
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.06665278  0.01903601
sample estimates:
mean in group 0 mean in group 1 
    -0.01178564      0.01202275 
t.test(news.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

    Two Sample t-test

data:  news.Bilat_NAcc_mask by Rel_p1Y_pos
t = -0.40911, df = 42, p-value = 0.6845
alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
95 percent confidence interval:
 -0.05585200  0.03702392
sample estimates:
mean in group 0 mean in group 1 
   -0.004480793     0.004933247 

7. Create NAcc plot

Create data frame for NAcc plot

std <- function(x) sd(x)/sqrt(length(x))

x=Case_mean[c(19,9,4,24,14,35)]

AvgBeta_group = x %>% group_by(Performance = Rel_p1Y_pos) %>%
  summarise("Company Profile" = mean(prof.Bilat_NAcc_mask), 
            "Price Graph" = mean(graph.Bilat_NAcc_mask),
            "Fundamentals" = mean(fund.Bilat_NAcc_mask),
            "Relative Valuation" = mean(rela.Bilat_NAcc_mask),
            "News Item" = mean(news.Bilat_NAcc_mask))

SE_group = x %>% group_by(Performance = Rel_p1Y_pos) %>%
  summarise("Company Profile" = std(prof.Bilat_NAcc_mask), 
            "Price Graph" = std(graph.Bilat_NAcc_mask),
            "Fundamentals" = std(fund.Bilat_NAcc_mask),
            "Relative Valuation" = std(rela.Bilat_NAcc_mask),
            "News Item" = std(news.Bilat_NAcc_mask))

# Make Average dataframe
AvgBeta=AvgBeta_group
AvgBeta <- pivot_longer(AvgBeta, cols=-Performance,names_to='Screen',values_to='AvgBeta')

# Make SE dataframe
SE=SE_group
SE <- pivot_longer(SE, cols=-Performance,names_to='Screen',values_to='SE')

# Combine Average and SE
NAccplot_df <- AvgBeta %>% right_join(SE, by=c("Performance","Screen"))

#NAccplot_df
            

Create NAccplot

NAccplot = NAccplot_df

NAccplot$Performance <- factor(NAccplot$Performance)
NAccplot$Screen <- factor(NAccplot$Screen, levels=unique(NAccplot$Screen), ordered=TRUE)


plot <- ggplot(NAccplot, aes(x=Screen, y=AvgBeta, fill=Performance)) +
    geom_bar(stat='identity', position = position_dodge2(padding = 0)) +
    geom_errorbar(aes(ymin=AvgBeta-SE, ymax=AvgBeta+SE), width=.2, position=position_dodge(0.9))+
    geom_hline(yintercept=0)+
    labs(x = "Information screen", fill="Future stock performance",y=expression(paste("Average ", beta, "-estimate (centered)")))+

    scale_fill_manual(labels = c("Underperforming", "Overperforming"), values = c("#8D8D8D", "#2070B4"))+

    annotate("text", x = 1, y = 0.06, label ="paste(italic(p), \" = 0.005\")",parse=TRUE,size=6)+ # p-value from t-test above
    annotate("text", x = 2, y = 0.06, label ="paste(italic(p), \" = 0.008\")",parse=TRUE,size=6)+ # p-value from t-test above
    theme_classic()+
    scale_y_continuous(expand = expansion(mult = c(0, 0)),limits = c(-0.07, 0.07),breaks=c(-0.06,-0.04,-0.02,0,0.02,0.04,0.06)) +
    theme(legend.position="bottom", legend.box = "horizontal",text = element_text(size = 28,family="Arial"),
          plot.title = element_text(hjust = 0.5))

 
#1. Open jpeg file
jpeg("NAcc_plot.jpg", width = 1200, height = 600)
#2. Create the plot
plot
#3. Close the file
dev.off()
null device 
          1 

8. Regression analysis: Stock performance inflection

#Create stock performance inflection variable
reg_df$Inflection <- ifelse(reg_df$Rel_m3Y_pos==reg_df$Rel_p1Y_pos, 0, 1)


formula=paste("Inflection~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
[1] "Inflection~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask"
out_prof_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(4) = 6.035, p = 0.197
Pseudo-R² (Cragg-Uhler) = 0.171
Pseudo-R² (McFadden) = 0.099
AIC = 64.962, BIC = 73.883 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   1.352   2.142    0.631   0.528
log_CMC                      -0.159   0.248   -0.639   0.523
prof.Bilat_NAcc_mask          6.480   5.684    1.140   0.254
prof.Bilat_MPFC_mask         -9.869   4.736   -2.084   0.037
prof.Bilat_AIns_mask         11.286   7.334    1.539   0.124
------------------------------------------------------------
print("No relationship between MPFC activity at profile screen and inflection:")
[1] "No relationship between MPFC activity at profile screen and inflection:"
formula=paste("Inflection~prof.Bilat_MPFC_mask",sep="")
print(formula)
[1] "Inflection~prof.Bilat_MPFC_mask"
out_prof_mpfc_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_mpfc_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.178, p = 0.673
Pseudo-R² (Cragg-Uhler) = 0.005
Pseudo-R² (McFadden) = 0.003
AIC = 64.819, BIC = 68.387 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.000   0.302    0.000   1.000
prof.Bilat_MPFC_mask         -1.002   2.377   -0.421   0.673
------------------------------------------------------------
formula=paste("Inflection~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
[1] "Inflection~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask"
out_graph_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 1.274, p = 0.735
Pseudo-R² (Cragg-Uhler) = 0.038
Pseudo-R² (McFadden) = 0.021
AIC = 67.723, BIC = 74.860 

Standard errors: MLE
-------------------------------------------------------------
                                Est.    S.E.   z val.       p
--------------------------- -------- ------- -------- -------
(Intercept)                    0.001   0.306    0.002   0.999
graph.Bilat_NAcc_mask          2.497   5.860    0.426   0.670
graph.Bilat_MPFC_mask          2.701   3.508    0.770   0.441
graph.Bilat_AIns_mask         -3.421   7.584   -0.451   0.652
-------------------------------------------------------------
formula=paste("Inflection~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
[1] "Inflection~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask"
out_fund_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 2.863, p = 0.413
Pseudo-R² (Cragg-Uhler) = 0.084
Pseudo-R² (McFadden) = 0.047
AIC = 66.134, BIC = 73.270 

Standard errors: MLE
-------------------------------------------------------------
                               Est.     S.E.   z val.       p
-------------------------- -------- -------- -------- -------
(Intercept)                   0.001    0.312    0.003   0.998
fund.Bilat_NAcc_mask         -5.686    7.394   -0.769   0.442
fund.Bilat_MPFC_mask         -3.191    3.844   -0.830   0.407
fund.Bilat_AIns_mask         16.939   11.142    1.520   0.128
-------------------------------------------------------------
formula=paste("Inflection~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
[1] "Inflection~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask"
out_rela_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 1.350, p = 0.717
Pseudo-R² (Cragg-Uhler) = 0.040
Pseudo-R² (McFadden) = 0.022
AIC = 67.647, BIC = 74.783 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.002   0.306    0.006   0.995
rela.Bilat_NAcc_mask          5.972   7.528    0.793   0.428
rela.Bilat_MPFC_mask         -3.913   4.005   -0.977   0.329
rela.Bilat_AIns_mask          1.866   8.161    0.229   0.819
------------------------------------------------------------
formula=paste("Inflection~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
[1] "Inflection~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask"
out_news_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(3) = 4.423, p = 0.219
Pseudo-R² (Cragg-Uhler) = 0.128
Pseudo-R² (McFadden) = 0.073
AIC = 64.574, BIC = 71.711 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                  -0.003   0.317   -0.008   0.994
news.Bilat_NAcc_mask         10.310   6.769    1.523   0.128
news.Bilat_MPFC_mask         -8.348   4.549   -1.835   0.066
news.Bilat_AIns_mask          2.711   6.771    0.400   0.689
------------------------------------------------------------
print("No relationship between MPFC activity at news screen and inflection:")
[1] "No relationship between MPFC activity at news screen and inflection:"
formula=paste("Inflection~news.Bilat_MPFC_mask",sep="")
print(formula)
[1] "Inflection~news.Bilat_MPFC_mask"
out_news_mpfc_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_mpfc_inflection,digits = getOption("jtools-digits", default = 3)))
MODEL INFO:
Observations: 44
Dependent Variable: Inflection
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.615, p = 0.433
Pseudo-R² (Cragg-Uhler) = 0.019
Pseudo-R² (McFadden) = 0.010
AIC = 64.382, BIC = 67.950 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.001   0.304    0.004   0.997
news.Bilat_MPFC_mask         -2.221   2.861   -0.776   0.438
------------------------------------------------------------

9. Regression models: NAcc factor

formula=paste("Rel_p1Y_pos~Factor1",sep="")
print(formula)
[1] "Rel_p1Y_pos~Factor1"
out_factor1 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_factor1,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 7.100, p = 0.008
Pseudo-R² (Cragg-Uhler) = 0.199
Pseudo-R² (McFadden) = 0.116
AIC = 57.897, BIC = 61.465 

Standard errors: MLE
--------------------------------------------------
                     Est.    S.E.   z val.       p
----------------- ------- ------- -------- -------
(Intercept)         0.010   0.327    0.030   0.976
Factor1             0.913   0.382    2.388   0.017
--------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~Factor2",sep="")
print(formula)
[1] "Rel_p1Y_pos~Factor2"
out_factor2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_factor2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.113, p = 0.737
Pseudo-R² (Cragg-Uhler) = 0.003
Pseudo-R² (McFadden) = 0.002
AIC = 64.884, BIC = 68.452 

Standard errors: MLE
---------------------------------------------------
                      Est.    S.E.   z val.       p
----------------- -------- ------- -------- -------
(Intercept)         -0.000   0.302   -0.000   1.000
Factor2             -0.103   0.307   -0.335   0.737
---------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

10. Regression models: Market, Market + Behavior, Market + Behavior + Brain

formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr"
out_market <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_market,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(12) = 13.099, p = 0.362
Pseudo-R² (Cragg-Uhler) = 0.343
Pseudo-R² (McFadden) = 0.215
AIC = 73.898, BIC = 97.093 

Standard errors: MLE
----------------------------------------------------
                       Est.    S.E.   z val.       p
------------------ -------- ------- -------- -------
(Intercept)           0.309   0.813    0.380   0.704
log_CMC               0.751   0.766    0.981   0.327
Rel_m3Y_pos           0.194   0.847    0.229   0.819
f_s                  -0.360   0.485   -0.743   0.457
f_ebit               -0.294   0.577   -0.510   0.610
f_ebitmargin         -0.147   0.731   -0.201   0.841
f_roe                -0.408   0.940   -0.434   0.664
f_roic               -1.005   0.833   -1.207   0.228
rv_evs                0.004   0.887    0.004   0.997
rv_ebitda             1.989   1.210    1.644   0.100
rv_pe                -1.225   0.775   -1.581   0.114
rv_pb                 1.252   3.684    0.340   0.734
news_incgr            1.267   1.520    0.834   0.404
----------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice"
out_market_choice <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_market_choice,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(13) = 14.440, p = 0.344
Pseudo-R² (Cragg-Uhler) = 0.373
Pseudo-R² (McFadden) = 0.237
AIC = 74.557, BIC = 99.535 

Standard errors: MLE
----------------------------------------------------
                       Est.    S.E.   z val.       p
------------------ -------- ------- -------- -------
(Intercept)           0.285   0.789    0.362   0.718
log_CMC               0.864   0.763    1.132   0.258
Rel_m3Y_pos          -0.068   0.896   -0.076   0.939
f_s                  -0.204   0.498   -0.410   0.682
f_ebit               -0.577   0.616   -0.938   0.348
f_ebitmargin         -0.035   0.796   -0.044   0.965
f_roe                -0.370   0.961   -0.386   0.700
f_roic               -1.298   0.971   -1.336   0.182
rv_evs                0.047   0.893    0.053   0.958
rv_ebitda             1.980   1.243    1.593   0.111
rv_pe                -1.256   0.724   -1.735   0.083
rv_pb                 1.032   3.297    0.313   0.754
news_incgr            0.547   1.568    0.349   0.727
choice                0.565   0.500    1.131   0.258
----------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice+Factor1",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice+Factor1"
out_all <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_all,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(14) = 25.334, p = 0.031
Pseudo-R² (Cragg-Uhler) = 0.584
Pseudo-R² (McFadden) = 0.415
AIC = 65.663, BIC = 92.426 

Standard errors: MLE
----------------------------------------------------
                       Est.    S.E.   z val.       p
------------------ -------- ------- -------- -------
(Intercept)           0.165   0.811    0.204   0.839
log_CMC               1.195   1.082    1.105   0.269
Rel_m3Y_pos           0.357   1.083    0.330   0.742
f_s                   0.934   0.741    1.261   0.207
f_ebit               -1.374   0.920   -1.494   0.135
f_ebitmargin          0.388   1.107    0.350   0.726
f_roe                -1.149   1.577   -0.729   0.466
f_roic               -1.037   1.122   -0.924   0.356
rv_evs                0.986   1.010    0.976   0.329
rv_ebitda             2.539   1.495    1.699   0.089
rv_pe                -1.844   0.911   -2.024   0.043
rv_pb                 0.199   1.176    0.169   0.866
news_incgr            2.326   1.866    1.247   0.212
choice                1.299   0.764    1.699   0.089
Factor1               1.879   0.741    2.534   0.011
----------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Create Table 3:

coef_names <- c("Current Market Capitalization" = "log_CMC",
                "36M Performance"='Rel_m3Y_pos',
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
               
                "Income growth" = "news_incgr","Prediction" = "choice","NAcc - Profile, Graph" = "Factor1")
# Uncomment to create table
#quick_docx(export_summs(out_market,out_market_choice,out_all, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Market","Market + Behavior","Market + Behavior + Brain"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")),file='Table-3.docx')

Control stock metric: rv_pe

formula=paste("Rel_p1Y_pos~rv_pe",sep="")
print(formula)
[1] "Rel_p1Y_pos~rv_pe"
out_rvpe <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rvpe,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.029, p = 0.866
Pseudo-R² (Cragg-Uhler) = 0.001
Pseudo-R² (McFadden) = 0.000
AIC = 64.968, BIC = 68.537 

Standard errors: MLE
---------------------------------------------------
                      Est.    S.E.   z val.       p
----------------- -------- ------- -------- -------
(Intercept)         -0.000   0.302   -0.000   1.000
rv_pe               -0.052   0.306   -0.169   0.866
---------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Model comparison:

anova(out_market, out_all, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ log_CMC + Rel_m3Y_pos + f_s + f_ebit + f_ebitmargin + 
    f_roe + f_roic + rv_evs + rv_ebitda + rv_pe + rv_pb + news_incgr
Model 2: Rel_p1Y_pos ~ log_CMC + Rel_m3Y_pos + f_s + f_ebit + f_ebitmargin + 
    f_roe + f_roic + rv_evs + rv_ebitda + rv_pe + rv_pb + news_incgr + 
    choice + Factor1
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)   
1        31     47.898                        
2        29     35.663  2   12.236 0.002203 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(out_market_choice, out_all, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ log_CMC + Rel_m3Y_pos + f_s + f_ebit + f_ebitmargin + 
    f_roe + f_roic + rv_evs + rv_ebitda + rv_pe + rv_pb + news_incgr + 
    choice
Model 2: Rel_p1Y_pos ~ log_CMC + Rel_m3Y_pos + f_s + f_ebit + f_ebitmargin + 
    f_roe + f_roic + rv_evs + rv_ebitda + rv_pe + rv_pb + news_incgr + 
    choice + Factor1
  Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
1        30     46.557                          
2        29     35.663  1   10.894 0.0009647 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

11. Prediction analysis

Set input for prediction analysis:

crossval_df = reg_df[, c("Rel_p1Y_pos",
                         "choice",
                         "log_CMC","Rel_m3Y_pos","f_s","f_ebit","f_ebitmargin","f_roe","f_roic","rv_evs","rv_ebitda","rv_pe","rv_pb","news_incgr",
                         "Factor1")]

crossval_df$Rel_m3Y_pos <- as.factor(crossval_df$Rel_m3Y_pos)
crossval_df$Rel_p1Y_pos <- as.factor(crossval_df$Rel_p1Y_pos)

crossval_df

K-fold cross-validation with the caret package:

K-fold: Brain (NAcc)

braindat = crossval_df[,c("Rel_p1Y_pos","Factor1")]
choicedat = crossval_df[,c("Rel_p1Y_pos","choice")]
marketdat = crossval_df[,-c(2,15)]

set.seed(0)

kfold = function(data, nfolds){
folds <- createFolds(factor(data$Rel_p1Y_pos), nfolds, returnTrain = T)

train_model<- trainControl(method="cv", number=nfolds, index = folds, savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos~ .,
    data = data, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

return(confusionMatrix(model$pred$pred,model$pred$obs,positive="1"))

}

print(kfold(braindat,5))
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 16  8
         1  6 14
                                          
               Accuracy : 0.6818          
                 95% CI : (0.5242, 0.8139)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.01131         
                                          
                  Kappa : 0.3636          
                                          
 Mcnemar's Test P-Value : 0.78927         
                                          
            Sensitivity : 0.6364          
            Specificity : 0.7273          
         Pos Pred Value : 0.7000          
         Neg Pred Value : 0.6667          
             Prevalence : 0.5000          
         Detection Rate : 0.3182          
   Detection Prevalence : 0.4545          
      Balanced Accuracy : 0.6818          
                                          
       'Positive' Class : 1               
                                          

K-fold: Behavior (choice)

print(kfold(choicedat,5))
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 10 13
         1 12  9
                                          
               Accuracy : 0.4318          
                 95% CI : (0.2835, 0.5897)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.8544          
                                          
                  Kappa : -0.1364         
                                          
 Mcnemar's Test P-Value : 1.0000          
                                          
            Sensitivity : 0.4091          
            Specificity : 0.4545          
         Pos Pred Value : 0.4286          
         Neg Pred Value : 0.4348          
             Prevalence : 0.5000          
         Detection Rate : 0.2045          
   Detection Prevalence : 0.4773          
      Balanced Accuracy : 0.4318          
                                          
       'Positive' Class : 1               
                                          

K-fold: Market (Stock metrics)

print(kfold(marketdat,5))
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0  8 11
         1 14 11
                                          
               Accuracy : 0.4318          
                 95% CI : (0.2835, 0.5897)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.8544          
                                          
                  Kappa : -0.1364         
                                          
 Mcnemar's Test P-Value : 0.6892          
                                          
            Sensitivity : 0.5000          
            Specificity : 0.3636          
         Pos Pred Value : 0.4400          
         Neg Pred Value : 0.4211          
             Prevalence : 0.5000          
         Detection Rate : 0.2500          
   Detection Prevalence : 0.5682          
      Balanced Accuracy : 0.4318          
                                          
       'Positive' Class : 1               
                                          

Leave-one-out cross-validation analyses:

Leave-one-out: Brain

set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = braindat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result
Generalized Linear Model 

44 samples
 1 predictor
 2 classes: '0', '1' 

No pre-processing
Resampling: Leave-One-Out Cross-Validation 
Summary of sample sizes: 43, 43, 43, 43, 43, 43, ... 
Resampling results:

  Accuracy   Kappa
  0.6363636  0    
confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 15  9
         1  7 13
                                          
               Accuracy : 0.6364          
                 95% CI : (0.4777, 0.7759)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.04807         
                                          
                  Kappa : 0.2727          
                                          
 Mcnemar's Test P-Value : 0.80259         
                                          
            Sensitivity : 0.5909          
            Specificity : 0.6818          
         Pos Pred Value : 0.6500          
         Neg Pred Value : 0.6250          
             Prevalence : 0.5000          
         Detection Rate : 0.2955          
   Detection Prevalence : 0.4545          
      Balanced Accuracy : 0.6364          
                                          
       'Positive' Class : 1               
                                          

Leave-one-out: Behavior (choice)

set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = choicedat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result
Generalized Linear Model 

44 samples
 1 predictor
 2 classes: '0', '1' 

No pre-processing
Resampling: Leave-One-Out Cross-Validation 
Summary of sample sizes: 43, 43, 43, 43, 43, 43, ... 
Resampling results:

  Accuracy  Kappa
  0.5       0    
confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 10 10
         1 12 12
                                          
               Accuracy : 0.5             
                 95% CI : (0.3456, 0.6544)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.5598          
                                          
                  Kappa : 0               
                                          
 Mcnemar's Test P-Value : 0.8312          
                                          
            Sensitivity : 0.5455          
            Specificity : 0.4545          
         Pos Pred Value : 0.5000          
         Neg Pred Value : 0.5000          
             Prevalence : 0.5000          
         Detection Rate : 0.2727          
   Detection Prevalence : 0.5455          
      Balanced Accuracy : 0.5000          
                                          
       'Positive' Class : 1               
                                          

Leave-one-out: Market (Stock metrics)

set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = marketdat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result
Generalized Linear Model 

44 samples
12 predictors
 2 classes: '0', '1' 

No pre-processing
Resampling: Leave-One-Out Cross-Validation 
Summary of sample sizes: 43, 43, 43, 43, 43, 43, ... 
Resampling results:

  Accuracy   Kappa
  0.4318182  0    
confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
Confusion Matrix and Statistics

          Reference
Prediction  0  1
         0 10 13
         1 12  9
                                          
               Accuracy : 0.4318          
                 95% CI : (0.2835, 0.5897)
    No Information Rate : 0.5             
    P-Value [Acc > NIR] : 0.8544          
                                          
                  Kappa : -0.1364         
                                          
 Mcnemar's Test P-Value : 1.0000          
                                          
            Sensitivity : 0.4091          
            Specificity : 0.4545          
         Pos Pred Value : 0.4286          
         Neg Pred Value : 0.4348          
             Prevalence : 0.5000          
         Detection Rate : 0.2045          
   Detection Prevalence : 0.4773          
      Balanced Accuracy : 0.4318          
                                          
       'Positive' Class : 1               
                                          

S. Supplementary material: Appendix 4

Table S1: Logistic regression model for control regions

formula=paste("Rel_p1Y_pos~prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~prof.Left_PIns_mask+prof.Left_PVis_mask"
out_prof_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 0.037, p = 0.982
Pseudo-R² (Cragg-Uhler) = 0.001
Pseudo-R² (McFadden) = 0.001
AIC = 66.960, BIC = 72.313 

Standard errors: MLE
-----------------------------------------------------------
                              Est.    S.E.   z val.       p
------------------------- -------- ------- -------- -------
(Intercept)                 -0.000   0.302   -0.000   1.000
prof.Left_PIns_mask         -0.071   0.375   -0.191   0.849
prof.Left_PVis_mask          0.036   0.375    0.097   0.923
-----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~graph.Left_PIns_mask+graph.Left_PVis_mask"
out_graph_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 2.846, p = 0.241
Pseudo-R² (Cragg-Uhler) = 0.084
Pseudo-R² (McFadden) = 0.047
AIC = 64.151, BIC = 69.504 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.001   0.311    0.002   0.999
graph.Left_PIns_mask         -0.159   0.412   -0.387   0.699
graph.Left_PVis_mask          0.628   0.429    1.461   0.144
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~fund.Left_PIns_mask+fund.Left_PVis_mask"
out_fund_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 4.368, p = 0.113
Pseudo-R² (Cragg-Uhler) = 0.126
Pseudo-R² (McFadden) = 0.072
AIC = 62.629, BIC = 67.982 

Standard errors: MLE
-----------------------------------------------------------
                              Est.    S.E.   z val.       p
------------------------- -------- ------- -------- -------
(Intercept)                  0.003   0.317    0.011   0.991
fund.Left_PIns_mask         -0.728   0.423   -1.722   0.085
fund.Left_PVis_mask          0.816   0.450    1.813   0.070
-----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rela.Left_PIns_mask+rela.Left_PVis_mask"
out_rela_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 0.533, p = 0.766
Pseudo-R² (Cragg-Uhler) = 0.016
Pseudo-R² (McFadden) = 0.009
AIC = 66.464, BIC = 71.817 

Standard errors: MLE
-----------------------------------------------------------
                              Est.    S.E.   z val.       p
------------------------- -------- ------- -------- -------
(Intercept)                 -0.000   0.303   -0.001   0.999
rela.Left_PIns_mask         -0.104   0.402   -0.259   0.795
rela.Left_PVis_mask          0.278   0.409    0.680   0.497
-----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news.Left_PIns_mask+news.Left_PVis_mask"
out_news_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(2) = 0.085, p = 0.958
Pseudo-R² (Cragg-Uhler) = 0.003
Pseudo-R² (McFadden) = 0.001
AIC = 66.912, BIC = 72.264 

Standard errors: MLE
----------------------------------------------------------
                             Est.    S.E.   z val.       p
------------------------- ------- ------- -------- -------
(Intercept)                 0.000   0.302    0.000   1.000
news.Left_PIns_mask         0.019   0.339    0.055   0.956
news.Left_PVis_mask         0.079   0.340    0.234   0.815
----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
coef_names <- c("Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask",
                "Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask",
                "News Item - PVis" = "news.Left_PVis_mask","News Item - PIns" = "news.Left_PIns_mask")

# Uncomment to create table
#quick_docx(export_summs(out_prof_control,out_graph_control,out_fund_control,out_rela_control,out_news_control, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S1.docx')

Table S2: Correlation matrix:

prof_data <- reg_df[, c(18,19,20,21,22)]
names(prof_data) <- substring(names(prof_data), 6)

graph_data <- reg_df[, c(8,9,10,11,12)]
names(graph_data) <- substring(names(graph_data), 7)

fund_data <- reg_df[, c(3,4,5,6,7)]
names(fund_data) <- substring(names(fund_data), 6)

rela_data <- reg_df[, c(23,24,25,26,27)]
names(rela_data) <- substring(names(rela_data), 6)

news_data <- reg_df[, c(13,14,15,16,17)]
names(news_data) <- substring(names(news_data), 6)


res_prof <- cor(prof_data)

res_graph <-cor(graph_data)

res_fund <-cor(fund_data)

res_rela <-cor(rela_data)

res_news <-cor(news_data)

cor_matrices <-list(res_prof,res_graph,res_fund,res_rela,res_news)

Reduce("+", cor_matrices) / length(cor_matrices)
                Bilat_MPFC_mask Bilat_AIns_mask Bilat_NAcc_mask Left_PVis_mask Left_PIns_mask
Bilat_MPFC_mask       1.0000000       0.7104264       0.6892672      0.4125912      0.4929137
Bilat_AIns_mask       0.7104264       1.0000000       0.7615776      0.4339454      0.4942590
Bilat_NAcc_mask       0.6892672       0.7615776       1.0000000      0.5142567      0.5206732
Left_PVis_mask        0.4125912       0.4339454       0.5142567      1.0000000      0.5825221
Left_PIns_mask        0.4929137       0.4942590       0.5206732      0.5825221      1.0000000

Table S3: VOIs and Control regions

formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask"
out_prof_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 12.224, p = 0.032
Pseudo-R² (Cragg-Uhler) = 0.323
Pseudo-R² (McFadden) = 0.200
AIC = 60.773, BIC = 71.478 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.028   0.352    0.079   0.937
prof.Bilat_NAcc_mask          1.634   0.683    2.393   0.017
prof.Bilat_MPFC_mask          0.186   0.587    0.317   0.751
prof.Bilat_AIns_mask         -0.583   0.576   -1.013   0.311
prof.Left_PIns_mask           0.007   0.452    0.015   0.988
prof.Left_PVis_mask          -0.634   0.478   -1.327   0.185
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask"
out_graph_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 11.423, p = 0.044
Pseudo-R² (Cragg-Uhler) = 0.305
Pseudo-R² (McFadden) = 0.187
AIC = 61.573, BIC = 72.279 

Standard errors: MLE
-------------------------------------------------------------
                                Est.    S.E.   z val.       p
--------------------------- -------- ------- -------- -------
(Intercept)                   -0.011   0.347   -0.033   0.974
graph.Bilat_NAcc_mask          1.784   0.808    2.208   0.027
graph.Bilat_MPFC_mask         -0.835   0.589   -1.418   0.156
graph.Bilat_AIns_mask         -0.222   0.730   -0.304   0.761
graph.Left_PIns_mask          -0.249   0.467   -0.535   0.593
graph.Left_PVis_mask           0.258   0.522    0.494   0.621
-------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask"
out_fund_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 12.780, p = 0.026
Pseudo-R² (Cragg-Uhler) = 0.336
Pseudo-R² (McFadden) = 0.210
AIC = 60.217, BIC = 70.922 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                  -0.043   0.353   -0.123   0.902
fund.Bilat_NAcc_mask          2.114   0.880    2.403   0.016
fund.Bilat_MPFC_mask          0.401   0.560    0.716   0.474
fund.Bilat_AIns_mask         -1.521   0.849   -1.792   0.073
fund.Left_PIns_mask          -1.333   0.554   -2.406   0.016
fund.Left_PVis_mask           0.689   0.492    1.399   0.162
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask"
out_rela_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 5.191, p = 0.393
Pseudo-R² (Cragg-Uhler) = 0.148
Pseudo-R² (McFadden) = 0.085
AIC = 67.806, BIC = 78.511 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                  -0.007   0.320   -0.023   0.982
rela.Bilat_NAcc_mask          0.949   0.603    1.575   0.115
rela.Bilat_MPFC_mask          0.648   0.566    1.144   0.253
rela.Bilat_AIns_mask         -0.888   0.574   -1.545   0.122
rela.Left_PIns_mask          -0.625   0.565   -1.106   0.269
rela.Left_PVis_mask           0.172   0.427    0.404   0.687
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask"
out_news_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(5) = 3.080, p = 0.688
Pseudo-R² (Cragg-Uhler) = 0.090
Pseudo-R² (McFadden) = 0.050
AIC = 69.917, BIC = 80.622 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                  -0.003   0.312   -0.008   0.994
news.Bilat_NAcc_mask         -0.345   0.505   -0.683   0.495
news.Bilat_MPFC_mask          0.415   0.498    0.835   0.404
news.Bilat_AIns_mask          0.573   0.472    1.214   0.225
news.Left_PIns_mask          -0.310   0.437   -0.710   0.478
news.Left_PVis_mask           0.018   0.368    0.049   0.961
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
coef_names <- c("Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask","Profile - NAcc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask","Graph - NAcc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask","Fundamentals - NAcc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask","Relative Valuation - NAcc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "News item - PVis" = "news.Left_PVis_mask","News item - PIns" = "news.Left_PIns_mask","News item - NAcc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask" )

# Uncomment to create table
#quick_docx(export_summs(out_prof_control2,out_graph_control2,out_fund_control2,out_rela_control2,out_news_control2, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S3.docx')

Table S4: VOIs, Control regions and stock metrics:

formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask"
out_prof_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(6) = 12.484, p = 0.052
Pseudo-R² (Cragg-Uhler) = 0.329
Pseudo-R² (McFadden) = 0.205
AIC = 62.513, BIC = 75.002 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.026   0.353    0.074   0.941
log_CMC                       0.190   0.372    0.510   0.610
prof.Bilat_NAcc_mask          1.601   0.686    2.334   0.020
prof.Bilat_MPFC_mask          0.265   0.611    0.433   0.665
prof.Bilat_AIns_mask         -0.630   0.588   -1.071   0.284
prof.Left_PIns_mask          -0.017   0.461   -0.038   0.970
prof.Left_PVis_mask          -0.567   0.497   -1.139   0.255
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask"
out_graph_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(6) = 13.089, p = 0.042
Pseudo-R² (Cragg-Uhler) = 0.343
Pseudo-R² (McFadden) = 0.215
AIC = 61.908, BIC = 74.398 

Standard errors: MLE
-------------------------------------------------------------
                                Est.    S.E.   z val.       p
--------------------------- -------- ------- -------- -------
(Intercept)                   -0.679   0.642   -1.057   0.291
Rel_m3Y_pos                    1.069   0.855    1.250   0.211
graph.Bilat_NAcc_mask          2.153   0.900    2.391   0.017
graph.Bilat_MPFC_mask         -0.757   0.599   -1.265   0.206
graph.Bilat_AIns_mask         -0.342   0.762   -0.449   0.653
graph.Left_PIns_mask          -0.275   0.472   -0.583   0.560
graph.Left_PVis_mask           0.225   0.530    0.424   0.672
-------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask"
out_fund_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(10) = 14.925, p = 0.135
Pseudo-R² (Cragg-Uhler) = 0.384
Pseudo-R² (McFadden) = 0.245
AIC = 68.072, BIC = 87.698 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.024   0.370    0.066   0.947
f_s                          -0.294   0.506   -0.581   0.561
f_ebit                        0.179   0.451    0.398   0.690
f_ebitmargin                  0.767   0.600    1.278   0.201
f_roe                        -0.604   0.872   -0.693   0.488
f_roic                        0.103   0.634    0.163   0.871
fund.Bilat_NAcc_mask          2.183   0.946    2.308   0.021
fund.Bilat_MPFC_mask          0.393   0.597    0.659   0.510
fund.Bilat_AIns_mask         -1.651   0.926   -1.782   0.075
fund.Left_PIns_mask          -1.620   0.669   -2.420   0.016
fund.Left_PVis_mask           0.893   0.549    1.626   0.104
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask"
out_rela_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(9) = 10.149, p = 0.339
Pseudo-R² (Cragg-Uhler) = 0.275
Pseudo-R² (McFadden) = 0.166
AIC = 70.848, BIC = 88.690 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.102   0.437    0.233   0.816
rv_evs                        0.262   0.647    0.406   0.685
rv_ebitda                     1.199   0.747    1.605   0.108
rv_pe                        -0.729   0.699   -1.043   0.297
rv_pb                         0.703   1.769    0.397   0.691
rela.Bilat_NAcc_mask          0.865   0.613    1.410   0.159
rela.Bilat_MPFC_mask          0.832   0.610    1.363   0.173
rela.Bilat_AIns_mask         -0.852   0.588   -1.448   0.148
rela.Left_PIns_mask          -0.799   0.632   -1.265   0.206
rela.Left_PVis_mask           0.069   0.494    0.140   0.888
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask"
out_news_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(6) = 4.569, p = 0.600
Pseudo-R² (Cragg-Uhler) = 0.131
Pseudo-R² (McFadden) = 0.075
AIC = 70.428, BIC = 82.918 

Standard errors: MLE
------------------------------------------------------------
                               Est.    S.E.   z val.       p
-------------------------- -------- ------- -------- -------
(Intercept)                   0.159   0.400    0.399   0.690
news_incgr                    1.241   1.523    0.815   0.415
news.Bilat_NAcc_mask         -0.280   0.505   -0.555   0.579
news.Bilat_MPFC_mask          0.359   0.531    0.677   0.498
news.Bilat_AIns_mask          0.449   0.505    0.888   0.375
news.Left_PIns_mask          -0.297   0.442   -0.671   0.502
news.Left_PVis_mask           0.002   0.375    0.006   0.995
------------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.
coef_names <- c("Current Market Capitalization" = "log_CMC","Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask","Profile - NAcc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "36M Performance"='Rel_m3Y_pos',"Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask","Graph - NAcc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask","Fundamentals - NAcc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask","Relative Valuation - NAcc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "Income growth" = "news_incgr", "News item - PVis" = "news.Left_PVis_mask","News item - PIns" = "news.Left_PIns_mask","News item - NAcc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask"
                )

# Uncomment to create table
#quick_docx(export_summs(out_prof_control3,out_graph_control3,out_fund_control3,out_rela_control3,out_news_control3, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S4.docx')

Testing if there is a true relationship between PIns activity at fundamentals screen and future stock performance:

formula=paste("Rel_p1Y_pos~fund.Left_PIns_mask",sep="")
print(formula)
[1] "Rel_p1Y_pos~fund.Left_PIns_mask"
out_fund_pins <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_pins,digits = getOption("jtools-digits", default = 3),scale=TRUE))
MODEL INFO:
Observations: 44
Dependent Variable: Rel_p1Y_pos
Type: Generalized linear model
  Family: binomial 
  Link function: logit 

MODEL FIT:
χ²(1) = 0.535, p = 0.464
Pseudo-R² (Cragg-Uhler) = 0.016
Pseudo-R² (McFadden) = 0.009
AIC = 64.462, BIC = 68.030 

Standard errors: MLE
-----------------------------------------------------------
                              Est.    S.E.   z val.       p
------------------------- -------- ------- -------- -------
(Intercept)                  0.000   0.303    0.000   1.000
fund.Left_PIns_mask         -0.225   0.310   -0.726   0.468
-----------------------------------------------------------

Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

Compare models:

anova(out_prof, out_prof_control3, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ log_CMC + prof.Bilat_NAcc_mask + prof.Bilat_MPFC_mask + 
    prof.Bilat_AIns_mask
Model 2: Rel_p1Y_pos ~ log_CMC + prof.Bilat_NAcc_mask + prof.Bilat_MPFC_mask + 
    prof.Bilat_AIns_mask + prof.Left_PIns_mask + prof.Left_PVis_mask
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1        39     50.454                     
2        37     48.513  2   1.9408   0.3789
anova(out_graph, out_graph_control3, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ Rel_m3Y_pos + graph.Bilat_NAcc_mask + graph.Bilat_MPFC_mask + 
    graph.Bilat_AIns_mask
Model 2: Rel_p1Y_pos ~ Rel_m3Y_pos + graph.Bilat_NAcc_mask + graph.Bilat_MPFC_mask + 
    graph.Bilat_AIns_mask + graph.Left_PIns_mask + graph.Left_PVis_mask
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1        39     48.281                     
2        37     47.908  2  0.37255     0.83
anova(out_fund, out_fund_control3, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ f_s + f_ebit + f_ebitmargin + f_roe + f_roic + 
    fund.Bilat_NAcc_mask + fund.Bilat_MPFC_mask + fund.Bilat_AIns_mask
Model 2: Rel_p1Y_pos ~ f_s + f_ebit + f_ebitmargin + f_roe + f_roic + 
    fund.Bilat_NAcc_mask + fund.Bilat_MPFC_mask + fund.Bilat_AIns_mask + 
    fund.Left_PIns_mask + fund.Left_PVis_mask
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)  
1        35     54.013                       
2        33     46.072  2   7.9408  0.01887 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(out_rela, out_rela_control3, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ rv_evs + rv_ebitda + rv_pe + rv_pb + rela.Bilat_NAcc_mask + 
    rela.Bilat_MPFC_mask + rela.Bilat_AIns_mask
Model 2: Rel_p1Y_pos ~ rv_evs + rv_ebitda + rv_pe + rv_pb + rela.Bilat_NAcc_mask + 
    rela.Bilat_MPFC_mask + rela.Bilat_AIns_mask + rela.Left_PIns_mask + 
    rela.Left_PVis_mask
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1        36     52.646                     
2        34     50.848  2   1.7978    0.407
anova(out_news, out_news_control3, test = "LRT")
Analysis of Deviance Table

Model 1: Rel_p1Y_pos ~ news_incgr + news.Bilat_NAcc_mask + news.Bilat_MPFC_mask + 
    news.Bilat_AIns_mask
Model 2: Rel_p1Y_pos ~ news_incgr + news.Bilat_NAcc_mask + news.Bilat_MPFC_mask + 
    news.Bilat_AIns_mask + news.Left_PIns_mask + news.Left_PVis_mask
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1        39     56.896                     
2        37     56.428  2  0.46731   0.7916
---
title: "SPPT analyses - Copy for Figshare"
date: "February 2024"
output: 
  html_notebook:
    toc: yes
---
# 1. General discription

This notebook contains the statistical analyses for the SPPT dataset.

It takes as input the following files:

* Stock_metrics.xlsx: An Excel table containing the stock metrics that are displayed on the information screens of the investment cases. 
* Survey_scores.csv: A .csv table containing participants’ responses to the survey at the end of the experiment.
* NNIPanswers.txt files: These files are archived in Task_answers.zip. The text files contain the predictions and confidence ratings.
* Df_betas_repo.csv: A .csv table containing beta-estimates (brain activity) per VOI, per participant, per information screen. 


For additional details we refer to the README.docx file of the repository, and the paper.

# 2. Import packages and load data

Import packages:
```{r}
library(readr)
library(tidyverse)
library(tibble)
library(dplyr)
library(readxl)
library(jtools)
library(psych)
library(ggplot2)
library(rstatix)
library(huxtable)
library(caret)
```

Load beta estimates:
```{r}
# Load brain data
df_betas <- read_csv("brain/df_betas_repo.csv",col_types = cols())

# Subjects as factor
df_betas$ID <- factor(df_betas$ID)

# Get block + case from trial string
df_betas <- add_column(df_betas, block =sapply(strsplit(as.character(df_betas$case), "\\Case"), `[`, 1))
df_betas$case <- sapply(strsplit(as.character(df_betas$case), "\\Case"), `[`, 2)


# Remove case 51 because of an error with stimulus presentation for this case
remove_case <- c(51)
df_betas <- df_betas[!df_betas$case %in% remove_case, ]
```

Center estimates per participant per VOI:
```{r}
center <- function(x, na.rm = TRUE) (x - mean(x, na.rm = na.rm))

df_betas_centered <- df_betas %>% 
  group_by(ID) %>% 
   mutate_at(vars(matches('mask')), center)
```

Add answers (case, choice, stars - confidence rating):
```{r}
# Load and concatenate data
ff <- list.files("task-answers", pattern = "NNIPanswers.txt", recursive = TRUE, full.names = TRUE)
star_data <- do.call(rbind, lapply(ff, read.delim))

# Add subject number
sub_nr <- rep(2:37, each=45)
sub_nr <- sprintf("%02d", sub_nr)
ID <- paste("sub-",sub_nr, sep="")
star_data <- cbind(ID,star_data)

# Rename and fix columns
names(star_data)[names(star_data) == "Case"] <- "case"
star_data <- star_data[c("ID","case","Stars")]
star_data <- star_data %>% mutate(case = as.character(case))

# Combine data frames
all_data_df <- star_data %>% right_join(df_betas_centered, by=c("ID","case"))
all_data_df <- all_data_df %>% relocate(Stars, .after=last_col())
```

Add stock metrics:

```{r}
# Add case info
Stock_metrics <- read_excel("Stock_metrics.xlsx")

# Convert case to factor
Stock_metrics$case <- factor(Stock_metrics$case)

# Match by case
all_data_df <- cbind(all_data_df, Stock_metrics[match(all_data_df$case, Stock_metrics$case),])
all_data_df <- all_data_df[, !duplicated(colnames(all_data_df))]

# Log-transform CMC because it is highly skewed
all_data_df$log_CMC <- log(all_data_df$CMC)

# Add correct
all_data_df$cor_p1Y <- ifelse(all_data_df$choice==all_data_df$Rel_p1Y_pos, 1, 0) #Correct prediction at 1-year in the future (1 if equal, 0 if not)
```


And Survey scores:
(This also removes subjects 17,18 - these subjects are removed because of excessive head-motion)

```{r}
# Import data
survey_scores <- read_csv("Survey_scores.csv",col_types = cols())

# Join to all_data
all_data_df <- merge(all_data_df,survey_scores, by="ID")
```

Make names unique
```{r}
names(all_data_df)<-make.names(names(all_data_df),unique = TRUE)
```



# 3. Descriptive analyses: Participants

Calculate average per participant

```{r}
Sub_mean <- all_data_df %>%
  group_by(ID) %>%
  summarise_at(vars(-case,-block,-trial), list(~ mean(., na.rm=TRUE)))

#Sub_mean
```

Calculate age and years of experience:

```{r}
x=Sub_mean[50:53]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
```

Calculate self-reported difficulty/ realism and test if above mid-point of scale:

```{r}
x=Sub_mean[55:56]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
```

```{r}
t.test(Sub_mean$realistic, mu=4, alternative="two.sided")
```
```{r}
t.test(Sub_mean$difficult, mu=4, alternative="two.sided")
```

Self-reported importance of information screens:

```{r}
x=Sub_mean[61:65]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
```
Reshape data and conduct pairwise tests:

```{r}
x=Sub_mean[c(1,61:65)]
importance_scores <- pivot_longer(x, cols=-ID,names_to='Screen',values_to='Importance')

pwc <- importance_scores %>%
  pairwise_t_test(
    Importance ~ Screen, paired = TRUE,
    p.adjust.method = "bonferroni"
    )
pwc
```

# 4. Descriptive analyses: Investment cases

Calculate average per case 

```{r}
Case_mean <- all_data_df %>%
  group_by(case) %>%
  summarise_at(vars(-ID,-block,-trial), funs(mean(., na.rm=TRUE)))

Case_mean
```

Average prediction and correct prediction

* choice = Investor's prediction (underperforming/overperforming, coded as 0/1)
* correct = Is the prediction correct or not? (incorrect/correct, coded as 0/1)



```{r}
x=Case_mean[c(32,49)]
sapply(x, function(x) c( "Stand dev" = sd(x), 
                         "Mean"= mean(x,na.rm=TRUE),
                         "n" = length(x),
                         "Median" = median(x),
                         "CoeffofVariation" = sd(x)/mean(x,na.rm=TRUE),
                         "Minimum" = min(x),
                         "Maximun" = max(x),
                         "Upper Quantile" = quantile(x,1),
                         "LowerQuartile" = quantile(x,0)
                    )
)
```

# 5. Statistical analyses: Behavior 

Logistic regression: predictions to future market performance

Rel_p1Y_pos = Relative performance 1 year in the future (underperforming/overperforming, coded as 0/1)

```{r}
formula=paste("Rel_p1Y_pos~choice",sep="")
print(formula)
out_choice <- glm(formula,data=Case_mean,family = binomial(link = logit))

print(summ(out_choice,digits = getOption("jtools-digits", default = 3), scale=TRUE))
```

Correlation between self-reported confidence (Stars) and prediction accuracy

```{r}
cor.test(Case_mean$Stars, Case_mean$cor_p1Y)
```

# 6. Factor analysis - Brain activity

Select columns:

```{r}
nacc <- Case_mean[, c(4,9,14,19,24)]
KMO(r=cor(nacc))
```

Positive correlation accross information screens:

```{r}
median(cor(nacc))
```

Factor analysis:

```{r}
fit <- factanal(nacc, 2,rotation = "promax", scores='regression')

print(fit, digits=3, cutoff=0.3, sort=TRUE)
```

Add factor scores, creating df for regression:

```{r}
NAcc_factor_scores <- data.frame(fit$scores)
reg_df <- merge(Case_mean,NAcc_factor_scores,by="row.names",all.x=TRUE)
```

# 7. Regression analysis: Information screens

Regression analysis per information screen (for Table 2):

```{r}
formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
out_prof <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
out_graph <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
out_fund <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
out_rela <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
out_news <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news,digits = getOption("jtools-digits", default = 2),scale=TRUE))

```

Output table:

```{r}
coef_names <- c("Current Market Capitalization" = "log_CMC", "Profile - Nacc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "36M Performance"='Rel_m3Y_pos', "Graph - Nacc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "Fundamentals - Nacc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
                "Relative Valuation - Nacc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "Income growth" = "news_incgr","News item - Nacc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask" )

# Uncomment to create table
# quick_docx(export_summs(out_prof,out_graph,out_fund,out_rela,out_news, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#              model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#              statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-2.docx')
```

Only neural predictors:

```{r}
formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
out_prof_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
out_graph_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
out_fund_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
out_rela_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
out_news_neural <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_neural,digits = getOption("jtools-digits", default = 2),scale=TRUE))

```


Reduced models with only NAcc predictors (without MPFC and AIns) and stock metrics for each screen:

```{r}
formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask",sep="")
print(formula)
out_prof_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask",sep="")
print(formula)
out_graph_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask",sep="")
print(formula)
out_fund_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask",sep="")
print(formula)
out_rela_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask",sep="")
print(formula)
out_news_NAcc <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_NAcc,digits = getOption("jtools-digits", default = 2),scale=TRUE))

```


Reduced models with only NAcc predictors (without MPFC and AIns) and without stock metrics for each screen:

```{r}
formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask",sep="")
print(formula)
out_prof_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask",sep="")
print(formula)
out_graph_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask",sep="")
print(formula)
out_fund_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask",sep="")
print(formula)
out_rela_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))

formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask",sep="")
print(formula)
out_news_NAcc2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_NAcc2,digits = getOption("jtools-digits", default = 2),scale=TRUE))

```


T-tests for NAcc activity (underperforming/overperforming)

```{r}
t.test(prof.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

t.test(graph.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

t.test(fund.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

t.test(rela.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)

t.test(news.Bilat_NAcc_mask~Rel_p1Y_pos, data=reg_df,var.equal=TRUE)
```

# 7. Create NAcc plot

Create data frame for NAcc plot

```{r}
std <- function(x) sd(x)/sqrt(length(x))

x=Case_mean[c(19,9,4,24,14,35)]

AvgBeta_group = x %>% group_by(Performance = Rel_p1Y_pos) %>%
  summarise("Company Profile" = mean(prof.Bilat_NAcc_mask), 
            "Price Graph" = mean(graph.Bilat_NAcc_mask),
            "Fundamentals" = mean(fund.Bilat_NAcc_mask),
            "Relative Valuation" = mean(rela.Bilat_NAcc_mask),
            "News Item" = mean(news.Bilat_NAcc_mask))

SE_group = x %>% group_by(Performance = Rel_p1Y_pos) %>%
  summarise("Company Profile" = std(prof.Bilat_NAcc_mask), 
            "Price Graph" = std(graph.Bilat_NAcc_mask),
            "Fundamentals" = std(fund.Bilat_NAcc_mask),
            "Relative Valuation" = std(rela.Bilat_NAcc_mask),
            "News Item" = std(news.Bilat_NAcc_mask))

# Make Average dataframe
AvgBeta=AvgBeta_group
AvgBeta <- pivot_longer(AvgBeta, cols=-Performance,names_to='Screen',values_to='AvgBeta')

# Make SE dataframe
SE=SE_group
SE <- pivot_longer(SE, cols=-Performance,names_to='Screen',values_to='SE')

# Combine Average and SE
NAccplot_df <- AvgBeta %>% right_join(SE, by=c("Performance","Screen"))

#NAccplot_df
            
```

Create NAccplot

```{r}
NAccplot = NAccplot_df

NAccplot$Performance <- factor(NAccplot$Performance)
NAccplot$Screen <- factor(NAccplot$Screen, levels=unique(NAccplot$Screen), ordered=TRUE)


plot <- ggplot(NAccplot, aes(x=Screen, y=AvgBeta, fill=Performance)) +
    geom_bar(stat='identity', position = position_dodge2(padding = 0)) +
    geom_errorbar(aes(ymin=AvgBeta-SE, ymax=AvgBeta+SE), width=.2, position=position_dodge(0.9))+
    geom_hline(yintercept=0)+
    labs(x = "Information screen", fill="Future stock performance",y=expression(paste("Average ", beta, "-estimate (centered)")))+

    scale_fill_manual(labels = c("Underperforming", "Overperforming"), values = c("#8D8D8D", "#2070B4"))+

    annotate("text", x = 1, y = 0.06, label ="paste(italic(p), \" = 0.005\")",parse=TRUE,size=6)+ # p-value from t-test above
    annotate("text", x = 2, y = 0.06, label ="paste(italic(p), \" = 0.008\")",parse=TRUE,size=6)+ # p-value from t-test above
    theme_classic()+
    scale_y_continuous(expand = expansion(mult = c(0, 0)),limits = c(-0.07, 0.07),breaks=c(-0.06,-0.04,-0.02,0,0.02,0.04,0.06)) +
    theme(legend.position="bottom", legend.box = "horizontal",text = element_text(size = 28,family="Arial"),
          plot.title = element_text(hjust = 0.5))

 
#1. Open jpeg file
jpeg("NAcc_plot.jpg", width = 1200, height = 600)
#2. Create the plot
plot
#3. Close the file
dev.off()
```


# 8. Regression analysis: Stock performance inflection

```{r}
#Create stock performance inflection variable
reg_df$Inflection <- ifelse(reg_df$Rel_m3Y_pos==reg_df$Rel_p1Y_pos, 0, 1)


formula=paste("Inflection~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask",sep="")
print(formula)
out_prof_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_inflection,digits = getOption("jtools-digits", default = 3)))

print("No relationship between MPFC activity at profile screen and inflection:")

formula=paste("Inflection~prof.Bilat_MPFC_mask",sep="")
print(formula)
out_prof_mpfc_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_mpfc_inflection,digits = getOption("jtools-digits", default = 3)))

formula=paste("Inflection~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask",sep="")
print(formula)
out_graph_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_inflection,digits = getOption("jtools-digits", default = 3)))

formula=paste("Inflection~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask",sep="")
print(formula)
out_fund_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_inflection,digits = getOption("jtools-digits", default = 3)))

formula=paste("Inflection~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask",sep="")
print(formula)
out_rela_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_inflection,digits = getOption("jtools-digits", default = 3)))

formula=paste("Inflection~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask",sep="")
print(formula)
out_news_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_inflection,digits = getOption("jtools-digits", default = 3)))

print("No relationship between MPFC activity at news screen and inflection:")

formula=paste("Inflection~news.Bilat_MPFC_mask",sep="")
print(formula)
out_news_mpfc_inflection <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_mpfc_inflection,digits = getOption("jtools-digits", default = 3)))

```

# 9. Regression models: NAcc factor

```{r}
formula=paste("Rel_p1Y_pos~Factor1",sep="")
print(formula)
out_factor1 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_factor1,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~Factor2",sep="")
print(formula)
out_factor2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_factor2,digits = getOption("jtools-digits", default = 3),scale=TRUE))
```


# 10. Regression models: Market, Market + Behavior, Market + Behavior + Brain

```{r}
formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr",sep="")
print(formula)
out_market <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_market,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice",sep="")
print(formula)
out_market_choice <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_market_choice,digits = getOption("jtools-digits", default = 3),scale=TRUE))


formula=paste("Rel_p1Y_pos~log_CMC+Rel_m3Y_pos+f_s+f_ebit+f_ebitmargin+f_roe+f_roic+rv_evs+rv_ebitda+rv_pe+rv_pb+news_incgr+choice+Factor1",sep="")
print(formula)
out_all <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_all,digits = getOption("jtools-digits", default = 3),scale=TRUE))
```

Create Table 3:

```{r}
coef_names <- c("Current Market Capitalization" = "log_CMC",
                "36M Performance"='Rel_m3Y_pos',
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
               
                "Income growth" = "news_incgr","Prediction" = "choice","NAcc - Profile, Graph" = "Factor1")
# Uncomment to create table
#quick_docx(export_summs(out_market,out_market_choice,out_all, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Market","Market + Behavior","Market + Behavior + Brain"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")),file='Table-3.docx')
```

Control stock metric: rv_pe

```{r}
formula=paste("Rel_p1Y_pos~rv_pe",sep="")
print(formula)
out_rvpe <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rvpe,digits = getOption("jtools-digits", default = 3),scale=TRUE))
```

Model comparison:

```{r}
anova(out_market, out_all, test = "LRT")

anova(out_market_choice, out_all, test = "LRT")
```


# 11. Prediction analysis

Set input for prediction analysis:

```{r}
crossval_df = reg_df[, c("Rel_p1Y_pos",
                         "choice",
                         "log_CMC","Rel_m3Y_pos","f_s","f_ebit","f_ebitmargin","f_roe","f_roic","rv_evs","rv_ebitda","rv_pe","rv_pb","news_incgr",
                         "Factor1")]

crossval_df$Rel_m3Y_pos <- as.factor(crossval_df$Rel_m3Y_pos)
crossval_df$Rel_p1Y_pos <- as.factor(crossval_df$Rel_p1Y_pos)

crossval_df
```


K-fold cross-validation with the `caret` package:

K-fold: Brain (NAcc)

```{r}
braindat = crossval_df[,c("Rel_p1Y_pos","Factor1")]
choicedat = crossval_df[,c("Rel_p1Y_pos","choice")]
marketdat = crossval_df[,-c(2,15)]

set.seed(0)

kfold = function(data, nfolds){
folds <- createFolds(factor(data$Rel_p1Y_pos), nfolds, returnTrain = T)

train_model<- trainControl(method="cv", number=nfolds, index = folds, savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos~ .,
    data = data, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

return(confusionMatrix(model$pred$pred,model$pred$obs,positive="1"))

}

print(kfold(braindat,5))
```


K-fold: Behavior (choice)

```{r}
print(kfold(choicedat,5))
```

K-fold: Market (Stock metrics)

```{r}
print(kfold(marketdat,5))
```

Leave-one-out cross-validation analyses:

Leave-one-out: Brain

```{r}
set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = braindat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result

confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
```

Leave-one-out: Behavior (choice)

```{r}
set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = choicedat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result

confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
```

Leave-one-out: Market (Stock metrics)

```{r}
set.seed(0)
# LOOCV
train_model<- trainControl(method="loocv", savePredictions = TRUE)

# Model
model <- train(
    Rel_p1Y_pos ~ ., 
    data = marketdat, 
    method = "glm", # logistic regression
    family = "binomial",
    trControl = train_model)

model #to print model result

confusionMatrix(model$pred$pred,model$pred$obs,positive="1")
```



# S. Supplementary material: Appendix 4

Table S1: Logistic regression model for control regions

```{r}
formula=paste("Rel_p1Y_pos~prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
out_prof_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
out_graph_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
out_fund_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
out_rela_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
out_news_control <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control,digits = getOption("jtools-digits", default = 3),scale=TRUE))

```

```{r}
coef_names <- c("Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask",
                "Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask",
                "News Item - PVis" = "news.Left_PVis_mask","News Item - PIns" = "news.Left_PIns_mask")

# Uncomment to create table
#quick_docx(export_summs(out_prof_control,out_graph_control,out_fund_control,out_rela_control,out_news_control, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S1.docx')
```



Table S2: Correlation matrix:

```{r}
prof_data <- reg_df[, c(18,19,20,21,22)]
names(prof_data) <- substring(names(prof_data), 6)

graph_data <- reg_df[, c(8,9,10,11,12)]
names(graph_data) <- substring(names(graph_data), 7)

fund_data <- reg_df[, c(3,4,5,6,7)]
names(fund_data) <- substring(names(fund_data), 6)

rela_data <- reg_df[, c(23,24,25,26,27)]
names(rela_data) <- substring(names(rela_data), 6)

news_data <- reg_df[, c(13,14,15,16,17)]
names(news_data) <- substring(names(news_data), 6)


res_prof <- cor(prof_data)

res_graph <-cor(graph_data)

res_fund <-cor(fund_data)

res_rela <-cor(rela_data)

res_news <-cor(news_data)

cor_matrices <-list(res_prof,res_graph,res_fund,res_rela,res_news)

Reduce("+", cor_matrices) / length(cor_matrices)

```

Table S3: VOIs and Control regions


```{r}
formula=paste("Rel_p1Y_pos~prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
out_prof_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
out_graph_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
out_fund_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
out_rela_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
out_news_control2 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control2,digits = getOption("jtools-digits", default = 3),scale=TRUE))

```


```{r}
coef_names <- c("Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask","Profile - NAcc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask","Graph - NAcc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask","Fundamentals - NAcc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask","Relative Valuation - NAcc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "News item - PVis" = "news.Left_PVis_mask","News item - PIns" = "news.Left_PIns_mask","News item - NAcc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask" )

# Uncomment to create table
#quick_docx(export_summs(out_prof_control2,out_graph_control2,out_fund_control2,out_rela_control2,out_news_control2, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S3.docx')
```


Table S4: VOIs, Control regions and stock metrics:


```{r}
formula=paste("Rel_p1Y_pos~log_CMC+prof.Bilat_NAcc_mask+prof.Bilat_MPFC_mask+prof.Bilat_AIns_mask+prof.Left_PIns_mask+prof.Left_PVis_mask",sep="")
print(formula)
out_prof_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_prof_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~Rel_m3Y_pos+graph.Bilat_NAcc_mask+graph.Bilat_MPFC_mask+graph.Bilat_AIns_mask+graph.Left_PIns_mask+graph.Left_PVis_mask",sep="")
print(formula)
out_graph_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_graph_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~f_s+f_ebit+f_ebitmargin+f_roe+f_roic+fund.Bilat_NAcc_mask+fund.Bilat_MPFC_mask+fund.Bilat_AIns_mask+fund.Left_PIns_mask+fund.Left_PVis_mask",sep="")
print(formula)
out_fund_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~rv_evs+rv_ebitda+rv_pe+rv_pb+rela.Bilat_NAcc_mask+rela.Bilat_MPFC_mask+rela.Bilat_AIns_mask+rela.Left_PIns_mask+rela.Left_PVis_mask",sep="")
print(formula)
out_rela_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_rela_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))

formula=paste("Rel_p1Y_pos~news_incgr+news.Bilat_NAcc_mask+news.Bilat_MPFC_mask+news.Bilat_AIns_mask+news.Left_PIns_mask+news.Left_PVis_mask",sep="")
print(formula)
out_news_control3 <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_news_control3,digits = getOption("jtools-digits", default = 3),scale=TRUE))

```


```{r}
coef_names <- c("Current Market Capitalization" = "log_CMC","Profile - PVis" = "prof.Left_PVis_mask","Profile - PIns" = "prof.Left_PIns_mask","Profile - NAcc" = "prof.Bilat_NAcc_mask", "Profile - MPFC" = "prof.Bilat_MPFC_mask", "Profile - AIns" = "prof.Bilat_AIns_mask",
                "36M Performance"='Rel_m3Y_pos',"Graph - PVis" = "graph.Left_PVis_mask","Graph - PIns" = "graph.Left_PIns_mask","Graph - NAcc" = "graph.Bilat_NAcc_mask", "Graph - MPFC" = "graph.Bilat_MPFC_mask", "Graph - AIns" = "graph.Bilat_AIns_mask",
                "Sales" ="f_s", "EBIT" = "f_ebit", "EBIT Margin (%)" = "f_ebitmargin","ROE (%)" = "f_roe","ROIC (%)" = "f_roic",
                "Fundamentals - PVis" = "fund.Left_PVis_mask","Fundamentals - PIns" = "fund.Left_PIns_mask","Fundamentals - NAcc" = "fund.Bilat_NAcc_mask", "Fundamentals - MPFC" = "fund.Bilat_MPFC_mask", "Fundamentals - AIns" = "fund.Bilat_AIns_mask",
                "EV/Sales" = "rv_evs","EV/EBITDA" = "rv_ebitda","P/E" = "rv_pe","P/B" = "rv_pb",
                "Relative Valuation - PVis" = "rela.Left_PVis_mask","Relative Valuation - PIns" = "rela.Left_PIns_mask","Relative Valuation - NAcc" = "rela.Bilat_NAcc_mask", "Relative Valuation - MPFC" = "rela.Bilat_MPFC_mask", "Relative Valuation - AIns" = "rela.Bilat_AIns_mask",
                "Income growth" = "news_incgr", "News item - PVis" = "news.Left_PVis_mask","News item - PIns" = "news.Left_PIns_mask","News item - NAcc" = "news.Bilat_NAcc_mask", "News item - MPFC" = "news.Bilat_MPFC_mask", "News item - AIns" = "news.Bilat_AIns_mask"
                )

# Uncomment to create table
#quick_docx(export_summs(out_prof_control3,out_graph_control3,out_fund_control3,out_rela_control3,out_news_control3, scale = TRUE,coefs = coef_names,error_pos = c("same"),
#             model.names=c("Company Profile","Price Graph","Fundamentals","Relative Valuation","News Item"),
#             statistics= c(N = "nobs", AIC = "AIC", "Pseudo R2 (McFadden)" = "pseudo.r.squared.mcfadden")), file='Table-S4.docx')
```


Testing if there is a true relationship between PIns activity at fundamentals screen and future stock performance:

```{r}
formula=paste("Rel_p1Y_pos~fund.Left_PIns_mask",sep="")
print(formula)

out_fund_pins <- glm(formula,data=reg_df,family = binomial(link = logit))

print(summ(out_fund_pins,digits = getOption("jtools-digits", default = 3),scale=TRUE))
```


Compare models:


```{r}
anova(out_prof, out_prof_control3, test = "LRT")
```

```{r}
anova(out_graph, out_graph_control3, test = "LRT")
```

```{r}
anova(out_fund, out_fund_control3, test = "LRT")
```

```{r}
anova(out_rela, out_rela_control3, test = "LRT")
```

```{r}
anova(out_news, out_news_control3, test = "LRT")
```


