# SIENA tutorial using R for the Social Networks and Health 
#   workshop at Duke University on May 26, 2017
# Adapted from tutorial by Christian Steglich
#   http://www.gmw.rug.nl/~steglich/workshops/Groningen2015.htm

##############################################
# Contents:
#     (1) preparation
#     (2) inspect data
#     (3) preparing objects for RSiena
#     (4) model specification
#     (5) model estimation
#     (6) goodness of fit
#     (7) time heterogeneity
#     (8) interpretation
#     (9) simulations
##############################################

# ==========================
# (1) preparatory steps
# ==========================

# install the RSiena package
install.packages("RSiena")   
# this is an older version but it is simple to download

# the full version is here, but it is incompatible with R 3.4
# and requires compilation for macs. But if you insist, un-hash and run
# install.packages("RSiena", repos="http://R-Forge.R-project.org") 
# answer "y" to the question of whether you want to install from source

# optionally, install the RSienaTest package 
# install.packages("RSienaTest", repos="http://R-Forge.R-project.org")

# load RSiena commands and sna to the R namespace
library(RSiena)
library(sna)
# install.packages("lattice")
library(lattice)  # for plotting

# check your version # lastest version 1.1-304
packageVersion("RSiena")

# set working directory to where you want to store output files:
#setwd("/Users/davidschaefer/Dropbox (ASU)/ASU/Teaching/Duke/lab/output/")

# Create a series of functions that will help interpret model performance & output 
igraphNetworkExtraction <- function(i, data, sims, period, groupName, varName){
     require(igraph)
     dimsOfDepVar<- attr(data[[groupName]]$depvars[[varName]], "netdims")
     missings <- is.na(data[[groupName]]$depvars[[varName]][,,period]) |
                 is.na(data[[groupName]]$depvars[[varName]][,,period+1])
     if (is.null(i)) {
   # sienaGOF wants the observation:
       original <- data[[groupName]]$depvars[[varName]][,,period+1]
       original[missings] <- 0
       returnValue <- graph.adjacency(original)
     }
     else
     {
       missings <- graph.adjacency(missings)
   #sienaGOF wants the i-th simulation:
       returnValue <- graph.difference(
       graph.empty(dimsOfDepVar) +
           edges(t(sims[[i]][[groupName]][[varName]][[period]][,1:2])),
                missings)
     }
     returnValue
   }
   
GeodesicDistribution <- function (i, data, sims, period, groupName,
                           varName, levls=c(1:5,Inf), cumulative=TRUE, ...) {
     x <- networkExtraction(i, data, sims, period, groupName, varName)
     require(sna)
     a <- sna::geodist(symmetrize(x))$gdist
     if (cumulative)
     {
       gdi <- sapply(levls, function(i){ sum(a<=i) })
     }
	 else
     {
       gdi <- sapply(levls, function(i){ sum(a==i) })
     }
     names(gdi) <- as.character(levls)
     gdi
   }

   # Holland and Leinhardt Triad Census; see ?sna::triad.census.
   TriadCensus <- function(i, data, sims, wave, groupName, varName, levls=1:16){
       unloadNamespace("igraph") # to avoid package clashes
       require(sna)
       require(network)
       x <- networkExtraction(i, data, sims, wave, groupName, varName)
	   if (network.edgecount(x) <= 0){x <- symmetrize(x)}
       # because else triad.census(x) will lead to an error
       tc <- sna::triad.census(x)[1,levls]
       # names are transferred automatically
       tc
   }

  # Distribution of Bonacich eigenvalue centrality; see ?igraph::evcent.
  EigenvalueDistribution <- function (i, data, sims, period, groupName, varName,
                           levls=c(seq(0,1,by=0.125)), cumulative=TRUE){
     require(igraph)
     x <- igraphNetworkExtraction(i, data, sims, period, groupName, varName)
     a <- igraph::evcent(x)$vector
     a[is.na(a)] <- Inf
     lel <- length(levls)
     if (cumulative)
     {
       cdi <- sapply(2:lel, function(i){sum(a<=levls[i])})
     }
     else
     {
       cdi <- sapply(2:lel, function(i){
                     sum(a<=levls[i]) - sum(a <= levls[i-1])})
     }
     names(cdi) <- as.character(levls[2:lel])
     cdi
    }
    
MoranGeary <- function(i, data, sims, wave, groupName, varName, levls=1:2){
	#unloadNamespace("igraph") # to avoid package clashes
	require(sna)
	require(network)
	x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
	z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
	n <- length(z)
	z.ave <- mean(z,na.rm=TRUE)
	numerator <- n*sum(x*outer(z-z.ave,z-z.ave),na.rm=TRUE)
	denominator <- sum(x,na.rm=TRUE)*sum((z-z.ave)^2,na.rm=TRUE)
	res <- numerator/denominator
	numerator <- (n-1)*sum(x*(outer(z,z,FUN='-')^2),na.rm=TRUE)
	denominator <- 2*sum(x,na.rm=TRUE)*sum((z-z.ave)^2,na.rm=TRUE)
	res[2] <- numerator/denominator
	names(res) <- c("Moran","Geary")
	return(res)
}

Moran123 <- function(i, data, sims, wave, groupName, varName, levls=1){
	#unloadNamespace("igraph") # to avoid package clashes
	require(sna)
	require(network)
	x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
	z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
	# handle missing data [not checked if this makes sense]:
	x[is.na(x)] <- 0
	z[is.na(z)] <- mean(z,na.rm=TRUE)
	res <- nacf(x,z,lag.max=3,typ="moran")[2:4]
	names(res) <- c("d=1","d=2","d=3")
	return(res)
}

Geary123 <- function(i, data, sims, wave, groupName, varName, levls=1){
	#unloadNamespace("igraph") # to avoid package clashes
	require(sna)
	require(network)
	x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
	z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
	# handle missing data [not checked if this makes sense]:
	x[is.na(x)] <- 0
	z[is.na(z)] <- mean(z,na.rm=TRUE)
	res <- nacf(x,z,lag.max=5,typ="geary")[2:4]
	names(res) <- c("d=1","d=2","d=3")
	return(res)
}

EgoAlterTable <- function(i, data, sims, wave, groupName, varName, levls=1){
	#unloadNamespace("igraph") # to avoid package clashes
	#require(sna)
	#require(network)
	x <- as.sociomatrix(networkExtraction(i, data, sims, wave, groupName, varName[1]))
	z <- behaviorExtraction(i,data,sims,wave,groupName,varName[2])
	res <- matrix(0,nr=5,nc=5)
	for (ego in 1:5) {
	for (alt in 1:5) {
		thesum <- sum(x[z==ego,z==alt],na.rm=TRUE)
		if (thesum>0) {
			res[ego,alt] <- thesum
		}
	}}
	thenames <- paste('e',col(res),'a',row(res),sep='')
	res <- c(t(res))
	names(res) <- thenames
	return(res)
}

outTable <- function(x) {
	coef <- abs(x$theta)
	coefPretty <- sprintf("%.3f", round(coef,3))
	se <- diag(x$covtheta)**.5
	sePretty <- sprintf("%.3f", round(se,3))
	pval <- 2*pnorm(-abs(coef/se))
	symp <- symnum(pval, corr = FALSE,
               cutpoints = c(0,  .001,.01,.05, .1, 1),
               symbols = c("***","**","*","."," "))
    convPretty <- sprintf("%.3f", round(abs(x$tconv),3))
    out1 <- noquote(cbind(
		Function = x$effects[[1]], 
		Effect = x$effects[[2]], 
		Coef = coefPretty, 
		StEr = sePretty, 
		Sig = symp, 
		Conv = convPretty))
	out2 <- paste("Maximum Convergence Ratio:", round(x$tconv.max,3))
	return(list(out1,out2))
}

# ======================
# (2) inspect data
# ======================

# we'll use the s50 data, which are part of the RSiena package
?s50
# longitudinal data on networks and alcohol use (recent RSiena also has smoking)
# for more information: http://www.stats.ox.ac.uk/~snijders/siena/s50_data.htm

# alcohol use:
dim(s50a)  # 50 students (rows), 3 time points (columns)
head(s50a, 10)  # first 10 rows of data
apply(s50a,FUN=table,MARGIN=2,useNA='always')  # freq. distribution by wave
# alcohol use is already coded as ordinal - GOOD! (required for dependent behaviors)

# is there sufficient change in alcohol use?
# examine correlations and discrete changes
cor(data.frame(s50a))
table(s50a[,1],s50a[,2],useNA='always')
table(s50a[,2],s50a[,3],useNA='always')
# & total:
( totalChange <- table(s50a[,1],s50a[,3],useNA='always') ) 
sum(diag(totalChange)) / sum(totalChange)  # 50% at same level t1 & t3

# create a fake attribute as an example later
( fakeAttr <- rep(c(0, 1), each=25) )

# networks:
s50list <- list(s501,s502,s503)
lapply(s50list, dim)   # each network has 50 actors
lapply(s50list, function(x) table(x, useNA='always'))  # 113 to 122 ties, none missing

# converting the matrices to an array makes some things more efficient later
s50array <- array(c(s501,s502,s503),dim=c(dim(s501),3))
dim(s50array)  # 3 50x50 adjacency matrices layered atop one another

# network change between subsequent observations:
(tab1to2 <- table(s501,s502,useNA='always') )
# 2328 0's and 57 1's remained stable; 
# 59 0's became 1's; 56 1's became 0's
# measure stability using the jaccard index (script only works if no missing ties)
# should be minimum of .2 for RSiena
tab1to2[2,2] / (sum(tab1to2)-tab1to2[1,1])   # jaccard=.33

(tab2to3 <- table(s502,s503,useNA='always') )
tab2to3[2,2] / (sum(tab2to3)-tab2to3[1,1])   # jaccard=.38

# create colors to shade nodes by substance use
color5 <- c('gray95','gray70','gray50','gray30','gray5')  # light to dark for drinking 

# create and save a layout based on union of 3 networks.
# we will use the same layout to graph the network at each wave.
# rerun the g.layout until it is pleasing.
g.layout <- gplot(apply(s50array, c(1,2), max), usearrows=F)  

# plot each network with nodes shaded by accompanying alcohol use
par(mfrow=c(1,3))
for (i in 1:3) {
	gplot(s50array[,,i], vertex.col=color5[s50a[,i]], arrowhead.cex=.75, edge.col='gray70', coord=g.layout, 	
		main=paste('Drinking t',i,sep="") )
	}


# ================================
# (3) prepare objects for RSiena
# ================================

# networks must be stored as a matrix;
#   each matrix is the same size;
#   matrices are ultimately layered to form an array
# convert network to "dependent network variable" (RSiena object):
friends <- sienaDependent(s50array)

# behaviors to be modeled as an outcome must be a matrix
#   where rows are actors and columns are time points
class(s50a)  # alcohol use is already a matrix
# convert behavior to "dependent behavior variable" (RSiena object):
drinking <- sienaDependent(s50a,type="behavior")

# attributes are identified independently
# our fake attribute is a constant covariate
fakeAttrCC <- coCovar(fakeAttr)

# bind data together for Siena analysis:
myDataDrink <- sienaDataCreate(friends,drinking,fakeAttrCC)
myDataDrink

# we could also have no behaviors, just a network
myData <- sienaDataCreate(friends)
myData

# Any dependent objects specified during the binding step will be
#   included in the model by default. It is easiest to only list
#   networks or behaviors you intend to model.

# write a descriptive summary file (a good check):
# for newer versions of RSiena
#print01Report(myDataDrink,modelname='drink170526')
# older RSiena versions require an additional step
myEff <- getEffects(myDataDrink)
print01Report(myDataDrink,modelname='drink170526',myEff)


# ================================================
# (4) model specification / selection of effects
# ================================================

# create a model specification object for the data:
myEffects <- getEffects(myDataDrink)
myEffects
# note that some effects are included by default

# inspect possible effects:
effectsDocumentation(myEffects)
# Note how column "name" has three names (scroll way down),
#   one for each function in the model

# specify the model 
# drinking function: effects where drinking is the *outcome*

# Test whether drinking is 'contagious' (i.e., peer influence)
myEffects <- includeEffects(myEffects,avSim,
	interaction1="friends",name="drinking")
	# instead of 'avSim' could use 'totSim', 'avAlt', or 'totAlt'

# Test whether being lonely leads to drinking
myEffects <- includeEffects(myEffects,isolate,
	interaction1="friends",name="drinking")
	# instead of 'isolate' could use 'indegree' 

# network function: effects where friends is the *outcome* and drinking a predictor
# Test whether drinking helps one make friends
myEffects <- includeEffects(myEffects,egoX,
	interaction1="drinking",name="friends")

# Test whether drinking enhances (or reduces) popularity
myEffects <- includeEffects(myEffects,altX,
	interaction1="drinking",name="friends")

# Test whether actors select friends based on similar drinking level
#   (i.e., selection homophily)
myEffects <- includeEffects(myEffects,simX,
	interaction1="drinking",name="friends")
# instead of 'simX' could use 'egoXaltX' 

# add structural effects as control variables:
myEffects <- includeEffects(myEffects, recip, transTrip,
	name="friends")

# control for selection on the fake (categorical) attrbute
myEffects <- includeEffects(myEffects, egoX, altX, sameX,
    interaction1='fakeAttrCC', name="friends")

# control for effect of the fake attrbute on drinking
myEffects <- includeEffects(myEffects, effFrom,
    interaction1='fakeAttrCC', name="drinking")

# test whether an interaction between reciprocity and transitivity should be included
myEffects <- setEffect(myEffects, transRecTrip,
    include=T, fix=T, test=T)
# see Schweinberger (2012) for more info. on this score test

# full model specification looks like this now:
myEffects
# note that transitive reciprocated triplets is fixed (at 0) and tested for inclusion

# ====================
# (5) model estimation
# ====================

# Create an object with options for algorithm tuning:
# For newer RSiena versions use these options
#modelOptions <- sienaAlgorithmCreate(
#	projname='lab170526', MaxDegree=c(friends=6),
#	doubleAveraging=0, diagonalize=.2, seed=786840)  # the seed is for the lab only
# Older RSiena versions:
modelOptions <- sienaAlgorithmCreate(
	projname='lab170526', MaxDegree=c(friends=6),
	diagonalize=.2, seed=786840)  # the seed is for the lab only

# Estimate the model, you can decrease estimation time by 
#   using more processors with the options commented out below
myResults <- siena07(modelOptions, data=myDataDrink,
	effects= myEffects, batch=FALSE, verbose=TRUE
    )    # if multiple processors desired, comment out this line and run the next
#    , initC=T, useCluster=TRUE, nbrNodes=3) # replace "3" with your number of processors-1
myResults           # brief report of results

# If 'Overall maximum convergence ratio' is greater than .25 rerun the model
#   and use previous estimates as starting values (prevAns option)
# myAttempt2 <- siena07(modelOptions, data=myDataDrink, effects=myEffects, prevAns=myResults)
# To identify troublesome effects, look for high 'Convergence t-ratios' (>.1) 

# Note, there are no p-values; dividing the estimate by its standard error gives
#   a t-ratio that is approximatley normally distributed
#  (t-ratios > 2 are significant at alpha=.05)
#  But we can also create a function to table this with conventional stars
outTable(myResults)

# Examine fuller report of results (shows score tests)
summary(myResults)  

# The score test indicates the simulated count of transitive reciprocated triplets is off
# (null hypothesis is that the simulated count = observed count)
# Let's add this effect to our effects object and rerun the model
myEffects <- includeEffects(myEffects, transRecTrip,name="friends")

myResults2 <- siena07(modelOptions, data=myDataDrink,
	effects= myEffects, batch=FALSE, verbose=F,
	prevAns=myResults, returnDeps=T # specify starting values; return simulated nets (for GOF)
	)  
	
myResults2  # examine results; good covergence; transRecTrip is significant

myRes <- myResults2   # for the following steps, assign final estimates to "myRes"
# This allows us to use the same script without changing the results object name each time


# ====================
# (6) goodness of fit
# ====================

# Calculate the fit with respect to the indegree distribution.
# By specifying "verbose=TRUE" we get information on the screen telling us
#   how far calculations have progressed.
# (You may see a note about "method with signature CsparseMatrix# (etc.)
#   which you can ignore.)

# indegree
( gof.id <- sienaGOF(myRes, verbose=TRUE, varName="friends", IndegreeDistribution,
     join=T, cumulative=F) )
     
plot(gof.id)  # looks great!

# outdegree
( gof.od <- sienaGOF(myRes, verbose=TRUE, varName="friends", OutdegreeDistribution,
     join=T, cumulative=F) )
plot(gof.od)  # good

# More GOF functions (OPTIONAL)
#   To run, FIRST create the functions at the end of this file
#   WARNING: some of these take a long time (may want to skip during lab)

# geodesic distances
( gof.gd <- sienaGOF(myRes, verbose=TRUE, varName="friends", GeodesicDistribution,
     join=T, cumulative=F) )
plot(gof.gd)
# On the low side. There are too many short distances, not enough separation into components.

# triad census
(gof.tc <- sienaGOF(myRes, verbose=TRUE, varName="friends", TriadCensus, join=T) )
plot(gof.tc, scale=TRUE, center=TRUE)

# eigenvector centralities
( gof.ev <- sienaGOF(myRes, verbose=TRUE, varName="friends", EigenvalueDistribution,
     join=T, cumulative=F) )
plot(gof.ev)

# goodness of fit overall behaviour distribution
( gof.behaviour <- sienaGOF(myRes,BehaviorDistribution,
	verbose=TRUE,join=TRUE,varName="drinking") )
plot(gof.behaviour) # looks very good!

# two spatial autocorrelation coefficients
( gof.MoranGeary <- sienaGOF(myRes,MoranGeary,
	verbose=TRUE,join=FALSE,varName=c("friends","drinking")) )
plot(gof.MoranGeary,period=1) # acceptable
plot(gof.MoranGeary,period=2) # acceptable

# Moran's I autocorrelation at a distance: 
( gof.Moran <- sienaGOF(myRes,Moran123,
	verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.Moran) # acceptable
# note that coefficients are summed up over periods!

# same for Geary's c
( gof.Geary <- sienaGOF(myRes,Geary123,
	verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.Geary)	# acceptable
# note that coefficients are summed up over periods!

# ego-by-alter table for alcohol scores
( gof.EgoAlterTable <- sienaGOF(myRes,EgoAlterTable,
	verbose=TRUE,join=TRUE,varName=c("friends","drinking")) )
plot(gof.EgoAlterTable,center=TRUE,scale=TRUE)	


# ======================
# (7) time heterogeneity
# ======================

# test whether estimates differ from wave 1 to 2 vs. wave 2 to 3
tt <- sienaTimeTest(myRes)
summary(tt)
# see "Joint significance test", null hypothesis is time homogeneity 
# p > .05 indicates no time heterogeneity


# ======================
# (8) Interpretation
# ======================

# Ego-Alter Selection Table
# Interpret the effects of drinking on friend selection
# First define a function that incorporates the relevant part
#   of the evaluation function, dependent on the parameters b1, b2, b3,
#   the overall average v_av, the similarity average sim_av,
#   and the range ran_v
obj_n <- function(vi, vj){
  b1*(vi-v_av) + b2*(vj-v_av) + b3*(1 - abs(vi-vj)/ran_v - sim_av)
  }
# Now fill in the values of the parameter estimates and the averages (from the descriptive output)
v_av <- 3.113  # average drink 
sim_av <- 0.6744  # average similarity
ran_v <- 4   # range of values
b1 <- .0557  # drink ego
b2 <- -.0814  # drink alter
b3 <- 1.3071  # drink similarity

vv <- c(1, 2, 3, 4, 5)  # Define the values of v for which the table is to be given
sel_tab <- outer(vv, vv, obj_n)  # calculate the table

round(sel_tab,3)  # display the table: rows are egos, columns are alters
# cells indicate the contribution to the network function of
#   each type of dyad. rows are ego drinking (1-5); columns are alter drinking (1-5)

# plot the predicted contribution to the network function
graphics.off()
levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")), 
	scales=list(col='white'), main = "Contribution to the Network Function",
	xlab = "Ego's Attribute Value", ylab = "Alter's Attribute Value" )

# another plot (more approprite for continuous attributes)
filled.contour(x=vv, y=vv, z=sel_tab, zlim=c(min(sel_tab),max(sel_tab)), nlevels=50, axes=TRUE, 
	color.palette=colorRampPalette(c("white","yellow", "green", "blue")),
	main = "Contribution to the Network Function",
	xlab = "Ego Attribute Value", ylab = "Alter Attribute Value" )

# Peer Influence Table
# Define part of evaluation function
# 	Note: this will differ depending on measure of peer influence used!
obj_b <- function(vi, vj){
  b1*(vi-v_av) + b2*(vi-v_av)^2 + b3*(1 - abs(vi-vj)/ran_v - sim_av)
  }

# Fill in the values of the parameter estimates and the averages
v_av <- 3.113
sim_av <- 0.6983
b1 <- 0.42  # linear
b2 <- -0.0721  # quad
b3 <- 3.9603  # behavior: average similarity
vv <- c(1, 2, 3, 4, 5)

( sel_tab <- outer(vv, vv, obj_b) )  # calculate the table

# plot the predicted contribution to the behavior function
levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")), 
	scales=list(col='white'), main = "Contribution to the Behavior Function",
	xlab = "Ego's Prospective Attribute Value", ylab = "Alter's Attribute Value" )

# Rows are alters' given value; columns are ego's prospective behavior
# When ego's alters have the behavior in a given row, 
#	ego is drawn to the column with the highest positive value  


# Tangent: how do things look different with avAlt measure of influence?
#myEffects <- setEffect(myEffects,avSim,interaction1="friends",name="drinking", include=F)
#myEffects <- setEffect(myEffects,avAlt,interaction1="friends",name="drinking", include=T)
#myResults3 <- siena07(modelOptions, data=myDataDrink, effects=myEffects)  

#obj_b2 <- function(vi, vj){b1*(vi-v_av) + b2*(vi-v_av)^2 + b3*((vi-v_av)*(vj-v_av))}
#v_av <- 3.113
#sim_av <- 0.6983
#b1 <- 1.297  # linear
#b2 <- -0.6397  # quad
#b3 <- 1.4135  # behavior: average alter
#vv <- c(1, 2, 3, 4, 5) 
#( sel_tab <- outer(vv, vv, obj_b2) ) # calculate the table  
  
# plot the predicted contribution to the behavior function
#levelplot(sel_tab,col.regions=colorRampPalette(c("white","yellow", "green", "blue")), 
#	scales=list(col='white'), main = "Contribution to the Behavior Function",
#	xlab = "Ego's Prospective Attribute Value", ylab = "Alter's Attribute Value" )


# ======================
# (9) Simulations
# ======================

#  Simulations to evaluate an intervention or manipulating model parameters
#  Let's adjust the strength of peer influence

# reduce data to only two time points (1 & 3)
friends2 <- sienaDependent(s50array[,,c(1,3)])
drinking2 <- sienaDependent(s50a[,c(1,3)],type="behavior")
myDataSim <- sienaDataCreate(friends2,drinking2,fakeAttrCC)
myDataSim

simEffects <- getEffects(myDataSim)
simEffects <- includeEffects(simEffects,avSim,interaction1="friends2",name="drinking2")
simEffects <- includeEffects(simEffects,isolate,interaction1="friends2",name="drinking2")
simEffects <- includeEffects(simEffects,egoX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects,altX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects,simX,interaction1="drinking2",name="friends2")
simEffects <- includeEffects(simEffects, recip, transTrip,name="friends2")
simEffects <- includeEffects(simEffects, egoX, altX, sameX,interaction1='fakeAttrCC', name="friends2")
simEffects <- includeEffects(simEffects, effFrom,interaction1='fakeAttrCC', name="drinking2")
simEffects <- includeEffects(simEffects, transRecTrip,name="friends2")

# fit model with new two-wave data specification
modelOptionsSim1 <- sienaAlgorithmCreate(projname='lab170526sim',
	nsub=5, MaxDegree=c(friends2=6),seed=786840)  # the seed is for the lab only
myResultsObs <- siena07(modelOptionsSim1, data=myDataSim,
	effects=simEffects, returnDeps=T) 
myResultsObs

# set initial values for simulations based on estimated model 
simEffects$initialValue[simEffects$include==T] <- myResultsObs$theta
simEffects$fix[simEffects$include==T] <- TRUE
simEffects

# manipulate strength of peer influence to be much larger
simEffectsHiPI <- simEffects
simEffectsHiPI <- setEffect(simEffectsHiPI, avSim, interaction1='friends2', name='drinking2',
    initialValue=12, fix=T)  
simEffectsHiPI

# make peer influence much lower
simEffectsLoPI <- simEffects
simEffectsLoPI <- setEffect(simEffectsLoPI, avSim, interaction1='friends2', name='drinking2',
    initialValue=.5, fix=T)  
simEffectsLoPI

nIter <- 50
modelOptionsSim2 <- sienaAlgorithmCreate(projname='lab170526sim', MaxDegree=c(friends2=6),
	nsub=0, n3=nIter, simOnly=T, seed=4)  # the seed is for the lab only
myResultsSimHiPI <- siena07(modelOptionsSim2, data=myDataSim,
	effects=simEffectsHiPI, returnDeps=T, returnChains=T)
myResultsSimLoPI <- siena07(modelOptionsSim2, data=myDataSim,
	effects=simEffectsLoPI, returnDeps=T, returnChains=T)

# extract mean drinking from simulation runs
#  vectors to store means
meanSimO <- rep(0,1000)      # observed level of peer influence
meanSimHiPI <- rep(0,nIter)  # high peer influence
meanSimLoPI <- rep(0,nIter)  # low peer influence

# simulations using estimated parameters
for (m in 1:1000) {
	meanSimO[m] <- mean( myResultsObs$sims[[m]][[1]]$drinking[[1]] )
	}
# simulations that manipulate peer influence
for (m in 1:nIter) {
	meanSimHiPI[m] <- mean( myResultsSimHiPI$sims[[m]][[1]]$drinking2[[1]] )
	meanSimLoPI[m] <- mean( myResultsSimLoPI$sims[[m]][[1]]$drinking2[[1]] )
	}
meanSimHiPI
meanSimLoPI

# store observed mean drinking at time 3
meanObs <- mean(s50a[,3])

toPlot <- rbind(
	data.frame(cond="Observed",mean=meanSimO),
	data.frame(cond="High",mean=meanSimHiPI),
	data.frame(cond="Low",mean=meanSimLoPI) )
boxplot(mean ~ cond, data=toPlot, main="Mean Drinking", xlab="Value of Peer Influence",
	ylab="Mean Drinking")
abline(h=meanObs)

# examine simulated drinking change in greater detail
InitSim <- myResultsSimHiPI
nstudents <- dim(s50a)[1]
Zs5 <- array(NA, dim=c(nstudents,1))
Zb5 <- array (0, dim=c(nIter,1))
for (m in 1: nIter) {
  for (n in 1:nstudents) {
    Zs5[n,1] <- InitSim$sims[[m]][[1]]$drinking2[[1]][[n]]
    }
  Zb5[m] <-colSums(Zs5, na.rm=T)/nstudents
  }
Zb5
#average drinkers in simulation
colMeans(s50a, na.rm=T)   # observed t1: 2.88, t2: 3.1, t3: 3.36
colMeans(Zb5)    # average sim b is 3.2
mean( InitSim$sims[[50]][[1]]$drinking2[[1]] )   # mean of 50th simulation run: 3.02


# plot mean simulated drinking over time
chainNum <- 50
datChain <- t(matrix(unlist(InitSim$chain[[chainNum]][[1]][[1]]), nc=length(InitSim$chain[[chainNum]][[1]][[1]]))) 
datBehavChain <- datChain[datChain[,2]=="1",]    # condense to behavior change opportunities
( nBehavChanges <- dim(datBehavChain)[1] )       # number of behavior change opportunities
meanSeqB <- nBehavChanges + 1    # only record behavior changes
meanSeqB[1] <- mean(s50a[,1])   # set t1 drinking mean to observed level
for (i in 1:nBehavChanges) {
	meanSeqB[i+1] <- meanSeqB[i] + (as.numeric(datBehavChain[i,6])/50)
	}
plot(x=1:length(meanSeqB), y=meanSeqB, type="l", 
	ylab='drinking mean', xlab='time',
	main='mean drinking over time')	

# plot mean simulated drinking over time, with NETWORK change opportunities included (a better approach)
chainNum <- 50
datChain <- t(matrix(unlist(InitSim$chain[[chainNum]][[1]][[1]]), nc=length(InitSim$chain[[chainNum]][[1]][[1]]))) 
( nChanges <- dim(datChain)[1] )
meanSeqF <- nChanges + 1         # full sequence, whether behavior changes or not
meanSeqF[1] <- mean(s50a[,1])   # set t1 drinking mean to observed level
for (i in 1:nChanges) {
	if (datChain[i,2]=="0") { 
		meanSeqF[i+1] <- meanSeqF[i]
		}
	if (datChain[i,2]=="1") { 
		meanSeqF[i+1] <- meanSeqF[i] + (as.numeric(datChain[i,6])/50)
		}
	}
plot(x=1:length(meanSeqF), y=meanSeqF, type="l", 
	ylab='drinking mean', xlab='micro step',
	main='mean drinking over time')	

# extract mean drinking for all 50 chains, with network change opportunities included
simChanges <- rep(0, nIter)
for (i in 1: nIter) {simChanges[i] <- length(InitSim$chain[[i]][[1]][[1]]) }
maxChanges <- max(simChanges)

seqs <- matrix(Zb5, nr= nIter, nc=maxChanges+1)  # fill matrix with final mean
seqs[,1] <- mean(s50a[,1])  # set t1 drinking mean to observed level
for (i in 1: nIter) {
  datChain <- t(matrix(unlist(InitSim$chain[[i]][[1]][[1]]), nc=length(InitSim$chain[[i]][[1]][[1]]))) 
  for (j in 1: simChanges[i]) {
	if (datChain[j,2]=="0") { 
		seqs[i,j+1] <- seqs[i,j]
		}
	if (datChain[j,2]=="1") { 
		seqs[i,j+1] <- seqs[i,j] + (as.numeric(datChain[j,6])/50)
		}
	}
  }

# plot the first 25 iterations
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l",
	ylab='drinking mean', xlab='micro step',
	main='mean drinking over time')	
for (i in 1:25) { lines(x=1:dim(seqs)[2], y=seqs[i,], col=colors()[i*10]) }
# not very pretty, so let's try another approach

# plot one iteration using loess curve
micros <- 1:dim(seqs)[2]
lo1 <- loess(seqs[1,] ~ micros)
l1 <- predict(lo1, micros)
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l", 
	ylab='drinking mean', xlab='micro step',
	main='mean drinking over time')	
lines(x=micros, y=l1, col='blue')

# plot multiple iterations using loess curves
plot(x=1:dim(seqs)[2], y=seqs[1,], ylim=c(min(seqs),max(seqs)),type="l", 
	ylab='drinking mean', xlab='micro step', col='white',
	main='mean drinking over time')	
for (i in 1:25) {
	lines(x=micros, y=predict(loess(seqs[i,] ~ micros), micros), col=colors()[i*10])
	}