#this program demonstrates some of the network randomization bits and
#example models.
#Moody, 5.16.2018;
#clear everything to start...
rm(list = ls())
gc()
#load basic data manipulation bits
library(dplyr);
library(readr);
library(magrittr) #Supports pipe (%>%) commands that allow you to perform multiple operations with one statement
library(tidyr) #Additional functions for manipulating data
library(ggplot2)
#load the data - this is the same code as before
#first build the edgelist & nodelist info
setwd("C:/SNH18wd")
AHS_Base <- read_csv ('ahs_wpvar.csv',
col_names = TRUE);
AHS_adjlist <- AHS_Base %>%
select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, grade, sex, PSMOKES,commcnt) %>%
filter(commcnt==1);
AHS_Edges <- AHS_adjlist %>%
rename( id = `ego_nid`,
gender = `sex`) %>%
gather(Alter_Label, Target, mfnid_1:mfnid_5, ffnid_1:ffnid_5, na.rm = TRUE)
AHS_Edges=AHS_Edges %>% filter (Target != 99999);
AHS_Edges=AHS_Edges %>%select(id, Target);
#install.packages("statnet")
library(statnet)
g=as.network(AHS_Edges)
g %v% "grade" <- AHS_adjlist$grade
g %v% "sex" <- AHS_adjlist$sex
g %v% "smokes" <- AHS_adjlist$PSMOKES
# get degree scores
outdegree <- degree(g, cmode = "outdegree")
indegree<- degree(g, cmode = "indegree")
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
g %v% "degree" <- degree(g)
plot(g, vertex.col="grade")
plot(g, vertex.col="sex")
#how transitive is our network?
obs_tran=gtrans(g)
obs_tran
#distribution of dyads
dyads=dyad.census(g)
dyads
#to simulate...first initialize an empty vector
simsize=500;
ran_tran=vector(length=simsize)
#loop for the simulation...
j=1
while(j<(simsize+1)){
r=rguman(1,nv=71,mut=dyads[1],asym=dyads[2],null=dyads[3])
ran_tran[j]=gtrans(r)
if (is.na(ran_tran[j])) next else j=j+1 #skip bad sim cases
}
dt=as.data.frame(ran_tran)
ggplot(dt,aes(x=ran_tran))+geom_histogram()+
geom_vline(xintercept=obs_tran,
linetype=1,
color="red")
#perhaps the clustering observed is due to general categorical mixing,
#by grade or sex?
grades=as.character(g %v% "grade")
grdmix=mixingmatrix(g, "grade")
grdmix
#should be able to do this with the RGNMIX function, but that's
#not working, so let's do the same thing by hand.
#here's my failed code for the build in function,
################
#to simulate...first initialize an empty vector
#simsize=500;
#ran_tran=vector(length=simsize)
#loop for the simulation...
#j=1
#while(j<(simsize+1)){
# ran_g<-rgnmix(1,grades,grdmix$matrix,method="exact",return.as.edgelist=TRUE)
# ran_tran[j]=gtrans(as.network(ran_g))
# j=j+1
# if (is.na(ran_tran[j])) next else j=j+1 #skip bad sim cases
#}
#dt=as.data.frame(ran_tran)
#ggplot(dt,aes(x=ran_tran))+geom_histogram()+
# geom_vline(xintercept=obs_tran,
# linetype=1,
# color="red")
#now do it by hand. This code steals directly from some stuff
#jake fisher put together in 2017 session.
#1. get a probability matrix from the mixing matrix...need a denominator
c=as.matrix(table(grades))
cells=c%*%t(c)
mixprob=grdmix$matrix/cells
mixprob
#note this is technically not quite right...as we should subtractk
#1 from the diagonal...but I'm being lazy...
# 2. Now create an empty matrix:
prob.matrix <- matrix(NA, ncol = network.size(g),
nrow = network.size(g))
# 3. ...fill in the values based on the mixing matrix. Going to
#so get the categories...
grade.values <- sort(unique(grades))
# The general idea is to do this...
#prob.matrix[which(grades == 7), which(grades == 7)] <- tie.probs[1, 1]
# ...but that's going to require 36 lines of code. Let's write a loop instead.
for (i in 1:nrow(mixprob)) {
for (j in 1:ncol(mixprob)) {
prob.matrix[which(grades == grade.values[i]),
which(grades == grade.values[j])] <- mixprob[i, j]
}
}
#now we have a matrix where each ij cell is the probabilty of a
#tie based on mixing...so just do random bernulli draws from that,
#using the same code strucure as above...
simsize=500;
ran_tran=vector(length=simsize)
#loop for the simulation...
j=1
while(j<(simsize+1)){
r=rgraph(n = network.size(g), tprob = prob.matrix)
ran_tran[j]=gtrans(r)
if (is.na(ran_tran[j])) next else j=j+1 #skip bad sim cases
}
dt=as.data.frame(ran_tran)
ggplot(dt,aes(x=ran_tran))+geom_histogram()+
geom_vline(xintercept=obs_tran,
linetype=1,
color="red")
#could do the same thing by Sex, but the plot we had suggested it's
#not strongly clustered by sex...but you get the idea...
#also, would be more efficient to create a sample of graphs, store them
#then you can test a bunch of stats agains the same set...but transitivyt
#is cheap to calculate...so using that...
plot(hist(ran_tran),xlim = c(0, .5))
abline(v = obs_tran, col = "red")
# Now we pass both of those to the function in igraph
#one quick test...
r=igraph::sample_degseq(out.deg = outdegree, in.deg = indegree,method = "simple.no.multiple")
igraph::transitivity(r)
igraph::plot.igraph(r)
#same loop bit...could just write a function for this...
simsize=50;
ran_tran=vector(length=simsize)
#loop for the simulation...note I use the igraph transitivity function
j=1
while(j<(simsize+1)){
r=igraph::sample_degseq(out.deg = outdegree, in.deg = indegree,method = "simple.no.multiple")
ran_tran[j]=igraph::transitivity(r)
if (is.na(ran_tran[j])) next else j=j+1 #skip bad sim cases
}
dt=as.data.frame(ran_tran)
ggplot(dt,aes(x=ran_tran))+geom_histogram()+
geom_vline(xintercept=obs_tran,
linetype=1,
color="red")
#again...not even close. But, all this is binary; let's build a model
#to look at this with multiple conditionals.
####################
# ERGM
####################
#ergm code adapted from Jeff Smith's tutorial.
#clean house...
rm(c,cells, dt, dyads, grdmix,prob.matrix,r)
rm(grade.values,grades,i,j,mixprob,obs_tran,rant_tran,simsize)
#super simple model..
mod.rand<-ergm(g~edges)
summary(mod.rand)
#interpretation of model coefficients:
#the log-odds of any tie existing is -2.7275
#probability: exp(-2.7275)/(1+exp(-2.7275))=.0613
exp(-2.7275)/(1+exp(-2.7275))
#which compares to density as..
gden(g)
#How good is the model?
plot(gof(mod.rand))
#add some covariates...
mod.homoph<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes'))
summary(mod.homoph)
mod.gof=gof(mod.homoph)
plot(mod.gof)
#add some simple volume bitscovariates...
mod.p1<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+sender+receiver)
summary(mod.p1)
mod.gof=gof(mod.p1)
plot(mod.gof)
#model predicts better, but huge BIC cost! Simplify?
g %v% "indegree" <- indegree
g %v% "outdegree" <- outdegree
mod.p2<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual)
summary(mod.p2)
mod.gof=gof(mod.p2)
plot(mod.gof)
mod.gendmut<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual(same="sex",diff=TRUE))
summary(mod.gendmut)
#let's try a transitivity term...
# mod.tran<-ergm(g~edges+nodematch("sex")+nodematch("grade")
# +nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
# +mutual+triangle)
# summary(mod.gendmut)
#turns out that won't run...lets cheat!
mod.tran<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+triangle, control=control.ergm(MCMLE.maxit=2))
summary(mod.tran)
mcmc.diagnostics(mod.tran)
test=simulate.ergm(mod.tran,1)
plot(test)
mod.gwesp<-ergm(g~edges+nodematch("sex")+nodematch("grade")
+nodematch('smokes')+nodeicov("indegree")+nodeocov("outdegree")
+mutual+gwesp)
summary(mod.gwesp)
mcmc.diagnostics(mod.gwesp)
test=simulate.ergm(mod.gwesp,1)
plot(test)
plot(gof(mod.gwesp))
################################################################
#latent space models. These models allow us to fit a network without
#having to specify the actual tie formation patterns...
#code cribbed from Jake Fisher
#################################################################
# The most user-friendly latent space model software is in the latentnet package
# (more recent models are provided by the amen package).
#install.packages("latentnet")
library(latentnet)
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime;
summary(latent.fit)
plot(latent.fit)
mcmc.diagnostics(latent.fit)
plot(gof(latent.fit))
# Can add additional dimensions...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 3))
runtime=Sys.time()-start.time;
runtime;
plot(gof(latent.fit))
# ... latent groups ...
start.time <- Sys.time()
latent.fit <- ergmm(g ~ euclidean(d = 2, G = 5))
runtime=Sys.time()-start.time;
runtime;
plot(latent.fit)
plot(gof(latent.fit))
# ... or homophily effects
start.time <- Sys.time()
latent.fit <- ergmm(g ~ nodematch("grade") + nodematch("sex")+euclidean(d = 2))
runtime=Sys.time()-start.time;
runtime
summary(latent.fit)