# SIENA tutorial using R for the Social Networks and Health
# workshop at Duke University on May 26, 2017
# Adapted from tutorial by Christian Steglich
# http://www.gmw.rug.nl/~steglich/workshops/Groningen2015.htm
##############################################
# Contents:
# (1) preparation
# (2) inspect data
# (3) preparing objects for RSiena
# (4) model specification
# (5) model estimation
# (6) goodness of fit
# (7) time heterogeneity
# (8) interpretation
# (9) simulations
##############################################
# ==========================
# (1) preparatory steps
# ==========================
# install the RSiena package
install.packages("RSiena")
# this is an older version but it is simple to download
# the full version is here, but it is incompatible with R 3.4
# and requires compilation for macs. But if you insist, un-hash and run
# install.packages("RSiena", repos="http://R-Forge.R-project.org")
# answer "y" to the question of whether you want to install from source
# optionally, install the RSienaTest package
# install.packages("RSienaTest", repos="http://R-Forge.R-project.org")
# load RSiena commands and sna to the R namespace
library(RSiena)
library(sna)
# install.packages("lattice")
library(lattice) # for plotting
# check your version # lastest version 1.1-304
packageVersion("RSiena")
# set working directory to where you want to store output files:
#setwd("/Users/davidschaefer/Dropbox (ASU)/ASU/Teaching/Duke/lab/output/")
# Create a series of functions that will help interpret model performance & output
igraphNetworkExtraction <- function(i, data, sims, period, groupName, varName){
require(igraph)
dimsOfDepVar<- attr(data[[groupName]]$depvars[[varName]], "netdims")
missings <- is.na(data[[groupName]]$depvars[[varName]][,,period]) |
is.na(data[[groupName]]$depvars[[varName]][,,period+1])
if (is.null(i)) {
# sienaGOF wants the observation:
original <- data[[groupName]]$depvars[[varName]][,,period+1]
original[missings] <- 0
returnValue <- graph.adjacency(original)
}
else
{
missings <- graph.adjacency(missings)
#sienaGOF wants the i-th simulation:
returnValue <- graph.difference(
graph.empty(dimsOfDepVar) +
edges(t(sims[[i]][[groupName]][[varName]][[period]][,1:2])),
missings)
}
returnValue
}
GeodesicDistribution <- function (i, data, sims, period, groupName,
varName, levls=c(1:5,Inf), cumulative=TRUE, ...) {
x <- networkExtraction(i, data, sims, period, groupName, varName)
require(sna)
a <- sna::geodist(symmetrize(x))$gdist
if (cumulative)
{
gdi <- sapply(levls, function(i){ sum(a<=i) })
}
else
{
gdi <- sapply(levls, function(i){ sum(a==i) })
}
names(gdi) <- as.character(levls)
gdi
}
# Holland and Leinhardt Triad Census; see ?sna::triad.census.
TriadCensus <- function(i, data, sims, wave, groupName, varName, levls=1:16){
unloadNamespace("igraph") # to avoid package clashes
require(sna)
require(network)
x <- networkExtraction(i, data, sims, wave, groupName, varName)
if (network.edgecount(x) <= 0){x <- symmetrize(x)}
# because else triad.census(x) will lead to an error
tc <- sna::triad.census(x)[1,levls]
# names are transferred automatically
tc
}
# Distribution of Bonacich eigenvalue centrality; see ?igraph::evcent.
EigenvalueDistribution <- function (i, data, sims, period, groupName, varName,
levls=c(seq(0,1,by=0.125)), cumulative=TRUE){
require(igraph)
x <- igraphNetworkExtraction(i, data, sims, period, groupName, varName)
a <- igraph::evcent(x)$vector
a[is.na(a)] <- Inf
lel <- length(levls)
if (cumulative)
{
cdi <- sapply(2:lel, function(i){sum(a<=levls[i])})
}
else
{
cdi <- sapply(2:lel, function(i){
sum(a<=levls[i]) - sum(a <= levls[i-1])})
}
names(cdi) <- as.character(levls[2:lel])
cdi
}
MoranGeary <- function(i, data, sims, wave, groupName, varName, levls=1:2){
#unloadNamespace("igraph") # to avoid package clashes
require(sna)
require(network)
x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
n <- length(z)
z.ave <- mean(z,na.rm=TRUE)
numerator <- n*sum(x*outer(z-z.ave,z-z.ave),na.rm=TRUE)
denominator <- sum(x,na.rm=TRUE)*sum((z-z.ave)^2,na.rm=TRUE)
res <- numerator/denominator
numerator <- (n-1)*sum(x*(outer(z,z,FUN='-')^2),na.rm=TRUE)
denominator <- 2*sum(x,na.rm=TRUE)*sum((z-z.ave)^2,na.rm=TRUE)
res[2] <- numerator/denominator
names(res) <- c("Moran","Geary")
return(res)
}
Moran123 <- function(i, data, sims, wave, groupName, varName, levls=1){
#unloadNamespace("igraph") # to avoid package clashes
require(sna)
require(network)
x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
# handle missing data [not checked if this makes sense]:
x[is.na(x)] <- 0
z[is.na(z)] <- mean(z,na.rm=TRUE)
res <- nacf(x,z,lag.max=3,typ="moran")[2:4]
names(res) <- c("d=1","d=2","d=3")
return(res)
}
Geary123 <- function(i, data, sims, wave, groupName, varName, levls=1){
#unloadNamespace("igraph") # to avoid package clashes
require(sna)
require(network)
x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
# handle missing data [not checked if this makes sense]:
x[is.na(x)] <- 0
z[is.na(z)] <- mean(z,na.rm=TRUE)
res <- nacf(x,z,lag.max=5,typ="geary")[2:4]
names(res) <- c("d=1","d=2","d=3")
return(res)
}
EgoAlterTable <- function(i, data, sims, wave, groupName, varName, levls=1){
#unloadNamespace("igraph") # to avoid package clashes
#require(sna)
#require(network)
x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
res <- matrix(0,nr=5,nc=5)
for (ego in 1:5) {
for (alt in 1:5) {
thesum <- sum(x[z==ego,z==alt],na.rm=TRUE)
if (thesum>0) {
res[ego,alt] <- thesum
}
}}
thenames <- paste('e',col(res),'a',row(res),sep='')
res <- c(t(res))
names(res) <- thenames
return(res)
}
outTable <- function(x) {
coef <- abs(x$theta)
coefPretty <- sprintf("%.3f", round(coef,3))
se <- diag(x$covtheta)**.5
sePretty <- sprintf("%.3f", round(se,3))
pval <- 2*pnorm(-abs(coef/se))
symp <- symnum(pval, corr = FALSE,
cutpoints = c(0, .001,.01,.05, .1, 1),
symbols = c("***","**","*","."," "))
convPretty <- sprintf("%.3f", round(abs(x$tconv),3))
out1 <- noquote(cbind(
Function = x$effects[[1]],
Effect = x$effects[[2]],
Coef = coefPretty,
StEr = sePretty,
Sig = symp,
Conv = convPretty))
out2 <- paste("Maximum Convergence Ratio:", round(x$tconv.max,3))
return(list(out1,out2))
}
# ======================
# (2) inspect data
# ======================
# we'll use the s50 data, which are part of the RSiena package
?s50
# longitudinal data on networks and alcohol use (recent RSiena also has smoking)
# for more information: http://www.stats.ox.ac.uk/~snijders/siena/s50_data.htm
# alcohol use:
dim(s50a) # 50 students (rows), 3 time points (columns)
head(s50a, 10) # first 10 rows of data
apply(s50a,FUN=table,MARGIN=2,useNA='always') # freq. distribution by wave
# alcohol use is already coded as ordinal - GOOD! (required for dependent behaviors)
# is there sufficient change in alcohol use?
# examine correlations and discrete changes
cor(data.frame(s50a))
table(s50a[,1],s50a[,2],useNA='always')
table(s50a[,2],s50a[,3],useNA='always')
# & total:
( totalChange <- table(s50a[,1],s50a[,3],useNA='always') )
sum(diag(totalChange)) / sum(totalChange) # 50% at same level t1 & t3
# create a fake attribute as an example later
( fakeAttr <- rep(c(0, 1), each=25) )
# networks:
s50list <- list(s501,s502,s503)
lapply(s50list, dim) # each network has 50 actors
lapply(s50list, function(x) table(x, useNA='always')) # 113 to 122 ties, none missing
# converting the matrices to an array makes some things more efficient later
s50array <- array(c(s501,s502,s503),dim=c(dim(s501),3))
dim(s50array) # 3 50x50 adjacency matrices layered atop one another
# network change between subsequent observations:
(tab1to2 <- table(s501,s502,useNA='always') )
# 2328 0's and 57 1's remained stable;
# 59 0's became 1's; 56 1's became 0's
# measure stability using the jaccard index (script only works if no missing ties)
# should be minimum of .2 for RSiena
tab1to2[2,2] / (sum(tab1to2)-tab1to2[1,1]) # jaccard=.33
(tab2to3 <- table(s502,s503,useNA='always') )
tab2to3[2,2] / (sum(tab2to3)-tab2to3[1,1]) # jaccard=.38
# create colors to shade nodes by substance use
color5 <- c('gray95','gray70','gray50','gray30','gray5') # light to dark for drinking
# create and save a layout based on union of 3 networks.
# we will use the same layout to graph the network at each wave.
# rerun the g.layout until it is pleasing.
g.layout <- gplot(apply(s50array, c(1,2), max), usearrows=F)
# plot each network with nodes shaded by accompanying alcohol use
par(mfrow=c(1,3))
for (i in 1:3) {
gplot(s50array[,,i], vertex.col=color5[s50a[,i]], arrowhead.cex=.75, edge.col='gray70', coord=g.layout,
main=paste('Drinking t',i,sep="") )
}
# ================================
# (3) prepare objects for RSiena
# ================================
# networks must be stored as a matrix;
# each matrix is the same size;
# matrices are ultimately layered to form an array
# convert network to "dependent network variable" (RSiena object):
friends <- sienaDependent(s50array)
# behaviors to be modeled as an outcome must be a matrix
# where rows are actors and columns are time points
class(s50a) # alcohol use is already a matrix
# convert behavior to "dependent behavior variable" (RSiena object):
drinking <- sienaDependent(s50a,type="behavior")
# attributes are identified independently
# our fake attribute is a constant covariate
fakeAttrCC <- coCovar(fakeAttr)
# bind data together for Siena analysis:
myDataDrink <- sienaDataCreate(friends,drinking,fakeAttrCC)
myDataDrink
# we could also have no behaviors, just a network
myData <- sienaDataCreate(friends)
myData
# Any dependent objects specified during the binding step will be
# included in the model by default. It is easiest to only list
# networks or behaviors you intend to model.
# write a descriptive summary file (a good check):
# for newer versions of RSiena
#print01Report(myDataDrink,modelname='drink170526')
# older RSiena versions require an additional step
myEff <- getEffects(myDataDrink)
print01Report(myDataDrink,modelname='drink170526',myEff)
# ================================================
# (4) model specification / selection of effects
# ================================================
# create a model specification object for the data:
myEffects <- getEffects(myDataDrink)
myEffects
# note that some effects are included by default
# inspect possible effects:
effectsDocumentation(myEffects)
# Note how column "name" has three names (scroll way down),
# one for each function in the model
# specify the model
# drinking function: effects where drinking is the *outcome*
# Test whether drinking is 'contagious' (i.e., peer influence)
myEffects <- includeEffects(myEffects,avSim,
interaction1="friends",name="drinking")
# instead of 'avSim' could use 'totSim', 'avAlt', or 'totAlt'
# Test whether being lonely leads to drinking
myEffects <- includeEffects(myEffects,isolate,
interaction1="friends",name="drinking")
# instead of 'isolate' could use 'indegree'
# network function: effects where friends is the *outcome* and drinking a predictor
# Test whether drinking helps one make friends
myEffects <- includeEffects(myEffects,egoX,
interaction1="drinking",name="friends")
# Test whether drinking enhances (or reduces) popularity
myEffects <- includeEffects(myEffects,altX,
interaction1="drinking",name="friends")
# Test whether actors select friends based on similar drinking level
# (i.e., selection homophily)
myEffects <- includeEffects(myEffects,simX,
interaction1="drinking",name="friends")
# instead of 'simX' could use 'egoXaltX'
# add structural effects as control variables:
myEffects <- includeEffects(myEffects, recip, transTrip,
name="friends")
# control for selection on the fake (categorical) attrbute
myEffects <- includeEffects(myEffects, egoX, altX, sameX,
interaction1='fakeAttrCC', name="friends")
# control for effect of the fake attrbute on drinking
myEffects <- includeEffects(myEffects, effFrom,
interaction1='fakeAttrCC', name="drinking")
# test whether an interaction between reciprocity and transitivity should be included
myEffects <- setEffect(myEffects, transRecTrip,
include=T, fix=T, test=T)
# see Schweinberger (2012) for more info. on this score test
# full model specification looks like this now:
myEffects
# note that transitive reciprocated triplets is fixed (at 0) and tested for inclusion
# ====================
# (5) model estimation
# ====================
# Create an object with options for algorithm tuning:
# For newer RSiena versions use these options
#modelOptions <- sienaAlgorithmCreate(
# projname='lab170526', MaxDegree=c(friends=6),
# doubleAveraging=0, diagonalize=.2, seed=786840) # the seed is for the lab only
# Older RSiena versions:
modelOptions <- sienaAlgorithmCreate(
projname='lab170526', MaxDegree=c(friends=6),
diagonalize=.2, seed=786840) # the seed is for the lab only
# Estimate the model, you can decrease estimation time by
# using more processors with the options commented out below
myResults <- siena07(modelOptions, data=myDataDrink,
effects= myEffects, batch=FALSE, verbose=TRUE
) # if multiple processors desired, comment out this line and run the next
# , initC=T, useCluster=TRUE, nbrNodes=3) # replace "3" with your number of processors-1
myResults # brief report of results
# If 'Overall maximum convergence ratio' is greater than .25 rerun the model
# and use previous estimates as starting values (prevAns option)
# myAttempt2 <- siena07(modelOptions, data=myDataDrink, effects=myEffects, prevAns=myResults)
# To identify troublesome effects, look for high 'Convergence t-ratios' (>.1)
# Note, there are no p-values; dividing the estimate by its standard error gives
# a t-ratio that is approximatley normally distributed
# (t-ratios > 2 are significant at alpha=.05)
# But we can also create a function to table this with conventional stars
outTable(myResults)
# Examine fuller report of results (shows score tests)
summary(myResults)
# The score test indicates the simulated count of transitive reciprocated triplets is off
# (null hypothesis is that the simulated count = observed count)
# Let's add this effect to our effects object and rerun the model
myEffects <- includeEffects(myEffects, transRecTrip,name="friends")
myResults2 <- siena07(modelOptions, data=myDataDrink,
effects= myEffects, batch=FALSE, verbose=F,
prevAns=myResults, returnDeps=T # specify starting values; return simulated nets (for GOF)
)
myResults2 # examine results; good covergence; transRecTrip is significant
myRes <- myResults2 # for the following steps, assign final estimates to "myRes"
# This allows us to use the same script without changing the results object name each time
# ====================
# (6) goodness of fit
# ====================
# Calculate the fit with respect to the indegree distribution.
# By specifying "verbose=TRUE" we get information on the screen telling us
# how far calculations have progressed.
# (You may see a note about "method with signature CsparseMatrix# (etc.)
# which you can ignore.)
# indegree
( gof.id <- sienaGOF(myRes, verbose=TRUE, varName="friends", IndegreeDistribution,
join=T, cumulative=F) )
plot(gof.id) # looks great!
# outdegree
( gof.od <- sienaGOF(myRes, verbose=TRUE, varName="friends", OutdegreeDistribution,
join=T, cumulative=F) )
plot(gof.od) # good
# More GOF functions (OPTIONAL)
# To run, FIRST create the functions at the end of this file
# WARNING: some of these take a long time (may want to skip during lab)
# geodesic distances
( gof.gd <- sienaGOF(myRes, verbose=TRUE, varName="friends", GeodesicDistribution,
join=T, cumulative=F) )
plot(gof.gd)
# On the low side. There are too many short distances, not enough separation into components.
# triad census
(gof.tc <- sienaGOF(myRes, verbose=TRUE, varName="friends", TriadCensus, join=T) )
plot(gof.tc, scale=TRUE, center=TRUE)
# eigenvector centralities
( gof.ev <- sienaGOF(myRes, verbose=TRUE, varName="friends", EigenvalueDistribution,
join=T, cumulative=F) )
plot(gof.ev)
# goodness of fit overall behaviour distribution
( gof.behaviour <- sienaGOF(myRes,BehaviorDistribution,
verbose=TRUE,join=TRUE,varName="drinking") )
plot(gof.behaviour) # looks very good!
# two spatial autocorrelation coefficients
( gof.MoranGeary <- sienaGOF(myRes,MoranGeary,
verbose=TRUE,join=FALSE,varName=c("friends","drinking")) )
plot(gof.MoranGeary,period=1) # acceptable
plot(gof.MoranGeary,period=2) # acceptable
# Moran's I autocorrelation at a distance:
( gof.Moran <- sienaGOF(myRes,Moran123,
verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.Moran) # acceptable
# note that coefficients are summed up over periods!
# same for Geary's c
( gof.Geary <- sienaGOF(myRes,Geary123,
verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.Geary) # acceptable
# note that coefficients are summed up over periods!
# ego-by-alter table for alcohol scores
( gof.EgoAlterTable <- sienaGOF(myRes,EgoAlterTable,
verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.EgoAlterTable,center=TRUE,scale=TRUE)
# ======================
# (7) time heterogeneity
# ======================
# test whether estimates differ from wave 1 to 2 vs. wave 2 to 3
tt <- sienaTimeTest(myRes)
summary(tt)
# see "Joint significance test", null hypothesis is time homogeneity
# p > .05 indicates no time heterogeneity
# ======================
# (8) Interpretation
# ======================
# Ego-Alter Selection Table
# Interpret the effects of drinking on friend selection
# First define a function that incorporates the relevant part
# of the evaluation function, dependent on the parameters b1, b2, b3,
# the overall average v_av, the similarity average sim_av,
# and the range ran_v
obj_n <- function(vi, vj){
b1*(vi-v_av) + b2*(vj-v_av) + b3*(1 - abs(vi-vj)/ran_v - sim_av)
}
# Now fill in the values of the parameter estimates and the averages (from the descriptive output)
v_av <- 3.113 # average drink
sim_av <- 0.6744 # average similarity
ran_v <- 4 # range of values
b1 <- .0557 # drink ego
b2 <- -.0814 # drink alter
b3 <- 1.3071 # drink similarity
vv <- c(1, 2, 3, 4, 5) # Define the values of v for which the table is to be given
sel_tab <- outer(vv, vv, obj_n) # calculate the table
round(sel_tab,3) # display the table: rows are egos, columns are alters
# cells indicate the contribution to the network function of
# each type of dyad. rows are ego drinking (1-5); columns are alter drinking (1-5)
# plot the predicted contribution to the network function
graphics.off()
levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")),
scales=list(col='white'), main = "Contribution to the Network Function",
xlab = "Ego's Attribute Value", ylab = "Alter's Attribute Value" )
# another plot (more approprite for continuous attributes)
filled.contour(x=vv, y=vv, z=sel_tab, zlim=c(min(sel_tab),max(sel_tab)), nlevels=50, axes=TRUE,
color.palette=colorRampPalette(c("white","yellow", "green", "blue")),
main = "Contribution to the Network Function",
xlab = "Ego Attribute Value", ylab = "Alter Attribute Value" )
# Peer Influence Table
# Define part of evaluation function
# Note: this will differ depending on measure of peer influence used!
obj_b <- function(vi, vj){
b1*(vi-v_av) + b2*(vi-v_av)^2 + b3*(1 - abs(vi-vj)/ran_v - sim_av)
}
# Fill in the values of the parameter estimates and the averages
v_av <- 3.113
sim_av <- 0.6983
b1 <- 0.42 # linear
b2 <- -0.0721 # quad
b3 <- 3.9603 # behavior: average similarity
vv <- c(1, 2, 3, 4, 5)
( sel_tab <- outer(vv, vv, obj_b) ) # calculate the table
# plot the predicted contribution to the behavior function
levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")),
scales=list(col='white'), main = "Contribution to the Behavior Function",
xlab = "Ego's Prospective Attribute Value", ylab = "Alter's Attribute Value" )
# Rows are alters' given value; columns are ego's prospective behavior
# When ego's alters have the behavior in a given row,
# ego is drawn to the column with the highest positive value
# Tangent: how do things look different with avAlt measure of influence?
#myEffects <- setEffect(myEffects,avSim,interaction1="friends",name="drinking", include=F)
#myEffects <- setEffect(myEffects,avAlt,interaction1="friends",name="drinking", include=T)
#myResults3 <- siena07(modelOptions, data=myDataDrink, effects=myEffects)
#obj_b2 <- function(vi, vj){b1*(vi-v_av) + b2*(vi-v_av)^2 + b3*((vi-v_av)*(vj-v_av))}
#v_av <- 3.113
#sim_av <- 0.6983
#b1 <- 1.297 # linear
#b2 <- -0.6397 # quad
#b3 <- 1.4135 # behavior: average alter
#vv <- c(1, 2, 3, 4, 5)
#( sel_tab <- outer(vv, vv, obj_b2) ) # calculate the table
# plot the predicted contribution to the behavior function
#levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")),
# scales=list(col='white'), main = "Contribution to the Behavior Function",
# xlab = "Ego's Prospective Attribute Value", ylab = "Alter's Attribute Value" )
# ======================
# (9) Simulations
# ======================
# Simulations to evaluate an intervention or manipulating model parameters
# Let's adjust the strength of peer influence
# reduce data to only two time points (1 & 3)
friends2 <- sienaDependent(s50array[,,c(1,3)])
drinking2 <- sienaDependent(s50a[,c(1,3)],type="behavior")
myDataSim <- sienaDataCreate(friends2,drinking2,fakeAttrCC)
myDataSim
simEffects <- getEffects(myDataSim)
simEffects <- includeEffects(simEffects,avSim,interaction1="friends2",name="drinking2")
simEffects <- includeEffects(simEffects,isolate,interaction1="friends2",name="drinking2")
simEffects <- includeEffects(simEffects,egoX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects,altX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects,simX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects, recip, transTrip,name="friends2")
simEffects <- includeEffects(simEffects, egoX, altX, sameX,interaction1='fakeAttrCC', name="friends2")
simEffects <- includeEffects(simEffects, effFrom,interaction1='fakeAttrCC', name="drinking2")
simEffects <- includeEffects(simEffects, transRecTrip,name="friends2")
# fit model with new two-wave data specification
modelOptionsSim1 <- sienaAlgorithmCreate(projname='lab170526sim',
nsub=5, MaxDegree=c(friends2=6),seed=786840) # the seed is for the lab only
myResultsObs <- siena07(modelOptionsSim1, data=myDataSim,
effects=simEffects, returnDeps=T)
myResultsObs
# set initial values for simulations based on estimated model
simEffects$initialValue[simEffects$include==T] <- myResultsObs$theta
simEffects$fix[simEffects$include==T] <- TRUE
simEffects
# manipulate strength of peer influence to be much larger
simEffectsHiPI <- simEffects
simEffectsHiPI <- setEffect(simEffectsHiPI, avSim, interaction1='friends2', name='drinking2',
initialValue=12, fix=T)
simEffectsHiPI
# make peer influence much lower
simEffectsLoPI <- simEffects
simEffectsLoPI <- setEffect(simEffectsLoPI, avSim, interaction1='friends2', name='drinking2',
initialValue=.5, fix=T)
simEffectsLoPI
nIter <- 50
modelOptionsSim2 <- sienaAlgorithmCreate(projname='lab170526sim', MaxDegree=c(friends2=6),
nsub=0, n3=nIter, simOnly=T, seed=4) # the seed is for the lab only
myResultsSimHiPI <- siena07(modelOptionsSim2, data=myDataSim,
effects=simEffectsHiPI, returnDeps=T, returnChains=T)
myResultsSimLoPI <- siena07(modelOptionsSim2, data=myDataSim,
effects=simEffectsLoPI, returnDeps=T, returnChains=T)
# extract mean drinking from simulation runs
# vectors to store means
meanSimO <- rep(0,1000) # observed level of peer influence
meanSimHiPI <- rep(0,nIter) # high peer influence
meanSimLoPI <- rep(0,nIter) # low peer influence
# simulations using estimated parameters
for (m in 1:1000) {
meanSimO[m] <- mean( myResultsObs$sims[[m]][[1]]$drinking[[1]] )
}
# simulations that manipulate peer influence
for (m in 1:nIter) {
meanSimHiPI[m] <- mean( myResultsSimHiPI$sims[[m]][[1]]$drinking2[[1]] )
meanSimLoPI[m] <- mean( myResultsSimLoPI$sims[[m]][[1]]$drinking2[[1]] )
}
meanSimHiPI
meanSimLoPI
# store observed mean drinking at time 3
meanObs <- mean(s50a[,3])
toPlot <- rbind(
data.frame(cond="Observed",mean=meanSimO),
data.frame(cond="High",mean=meanSimHiPI),
data.frame(cond="Low",mean=meanSimLoPI) )
boxplot(mean ~ cond, data=toPlot, main="Mean Drinking", xlab="Value of Peer Influence",
ylab="Mean Drinking")
abline(h=meanObs)
# examine simulated drinking change in greater detail
InitSim <- myResultsSimHiPI
nstudents <- dim(s50a)[1]
Zs5 <- array(NA, dim=c(nstudents,1))
Zb5 <- array (0, dim=c(nIter,1))
for (m in 1: nIter) {
for (n in 1:nstudents) {
Zs5[n,1] <- InitSim$sims[[m]][[1]]$drinking2[[1]][[n]]
}
Zb5[m] <-colSums(Zs5, na.rm=T)/nstudents
}
Zb5
#average drinkers in simulation
colMeans(s50a, na.rm=T) # observed t1: 2.88, t2: 3.1, t3: 3.36
colMeans(Zb5) # average sim b is 3.2
mean( InitSim$sims[[50]][[1]]$drinking2[[1]] ) # mean of 50th simulation run: 3.02
# plot mean simulated drinking over time
chainNum <- 50
datChain <- t(matrix(unlist(InitSim$chain[[chainNum]][[1]][[1]]), nc=length(InitSim$chain[[chainNum]][[1]][[1]])))
datBehavChain <- datChain[datChain[,2]=="1",] # condense to behavior change opportunities
( nBehavChanges <- dim(datBehavChain)[1] ) # number of behavior change opportunities
meanSeqB <- nBehavChanges + 1 # only record behavior changes
meanSeqB[1] <- mean(s50a[,1]) # set t1 drinking mean to observed level
for (i in 1:nBehavChanges) {
meanSeqB[i+1] <- meanSeqB[i] + (as.numeric(datBehavChain[i,6])/50)
}
plot(x=1:length(meanSeqB), y=meanSeqB, type="l",
ylab='drinking mean', xlab='time',
main='mean drinking over time')
# plot mean simulated drinking over time, with NETWORK change opportunities included (a better approach)
chainNum <- 50
datChain <- t(matrix(unlist(InitSim$chain[[chainNum]][[1]][[1]]), nc=length(InitSim$chain[[chainNum]][[1]][[1]])))
( nChanges <- dim(datChain)[1] )
meanSeqF <- nChanges + 1 # full sequence, whether behavior changes or not
meanSeqF[1] <- mean(s50a[,1]) # set t1 drinking mean to observed level
for (i in 1:nChanges) {
if (datChain[i,2]=="0") {
meanSeqF[i+1] <- meanSeqF[i]
}
if (datChain[i,2]=="1") {
meanSeqF[i+1] <- meanSeqF[i] + (as.numeric(datChain[i,6])/50)
}
}
plot(x=1:length(meanSeqF), y=meanSeqF, type="l",
ylab='drinking mean', xlab='micro step',
main='mean drinking over time')
# extract mean drinking for all 50 chains, with network change opportunities included
simChanges <- rep(0, nIter)
for (i in 1: nIter) {simChanges[i] <- length(InitSim$chain[[i]][[1]][[1]]) }
maxChanges <- max(simChanges)
seqs <- matrix(Zb5, nr= nIter, nc=maxChanges+1) # fill matrix with final mean
seqs[,1] <- mean(s50a[,1]) # set t1 drinking mean to observed level
for (i in 1: nIter) {
datChain <- t(matrix(unlist(InitSim$chain[[i]][[1]][[1]]), nc=length(InitSim$chain[[i]][[1]][[1]])))
for (j in 1: simChanges[i]) {
if (datChain[j,2]=="0") {
seqs[i,j+1] <- seqs[i,j]
}
if (datChain[j,2]=="1") {
seqs[i,j+1] <- seqs[i,j] + (as.numeric(datChain[j,6])/50)
}
}
}
# plot the first 25 iterations
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l",
ylab='drinking mean', xlab='micro step',
main='mean drinking over time')
for (i in 1:25) { lines(x=1:dim(seqs)[2], y=seqs[i,], col=colors()[i*10]) }
# not very pretty, so let's try another approach
# plot one iteration using loess curve
micros <- 1:dim(seqs)[2]
lo1 <- loess(seqs[1,] ~ micros)
l1 <- predict(lo1, micros)
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l",
ylab='drinking mean', xlab='micro step',
main='mean drinking over time')
lines(x=micros, y=l1, col='blue')
# plot multiple iterations using loess curves
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l",
ylab='drinking mean', xlab='micro step', col='white',
main='mean drinking over time')
for (i in 1:25) {
lines(x=micros, y=predict(loess(seqs[i,] ~ micros), micros), col=colors()[i*10])
}