PEER INFLUENCE & DIFFUSION LAB | Duke Network Analysis Center

Download R source file

              ### 2016 DNAC Social Networks & Health Workshop 
### Peer Influence & Diffusion lab 
### Author: Jaemin Lee, Duke University

### Part 1. STATIC NETWORKS - A Simple SI Simulation on a Toy Graph from Scratch
setwd("your working directory")  # Save "comm1.net" in this directory! We will later read in that data.
library(igraph)

# Make a toy tree-like network with 50 nodes
size=50 
g = graph.tree(size, children = 2, mode="undirected")
plot(g)

# Initialize the diffusers
seeds_num = 1 # Let's begin with 1 seed for simplicity
set.seed(2016); diffusers = sample(1:vcount(g),seeds_num); diffusers # Randomly choose the seed
diffusers # Who is it?
g[[diffusers,]] # Who's connected to it?
nearest_neighbors<-unlist(g[[diffusers,]])

p=0.4 # let's say dyadic transmission probability is .4
sample(c(1,0),length(nearest_neighbors),replace=T, prob=c(p,1-p)) # Draw 1 or 0 based on p
is.infected<-sample(c(1,0),length(nearest_neighbors),replace=T, prob=c(p,1-p))
is.infected
new.infected<-nearest_neighbors[which(is.infected==1)] # Get IDs for newly infected nodes among neighbors
diffusers<-c(diffusers,new.infected) # Update diffusers
diffusers
# Visualize it
V(g)$color = "white"
V(g)$color[V(g)%in%diffusers] = "red"
plot(g)

## Repeat this procedure by making a loop

# function for updating the spreaders
update_diffusers = function(diffusers){
  nearest_neighbors = unlist(ego(g, 1, diffusers))
  nearest_neighbors = setdiff(nearest_neighbors, diffusers)
  n=length(nearest_neighbors)
  is.infected=sample(c(1,0),n,replace=T, prob=c(p,1-p))
  new_infected = nearest_neighbors[which(is.infected==1)]
  diffusers = unique(c(diffusers, new_infected))
  return(diffusers)
}

infected=list() # Initialize a list to record who's infected
infected[[1]] = sample(1:vcount(g),seeds_num) # First seed


# Start the contagion processes

i = 1
while(length(infected[[i]]) < size){ 
  infected[[i+1]] = update_diffusers(infected[[i]]) %>% sort()
  print(paste("Tick:",i,"Infected:",length(infected[[i]])))
  i = i + 1
}

## Visualizations
# Diffusion curve
num_cum = lapply(1:i, function(x) length(infected[[x]])) %>% unlist()
p_cum = num_cum/max(num_cum)
time = 1:i
plot(main="Proportion infected",p_cum~time, type = "b") # type "b" -- plotting Both lines and points


# Network visualization

l<-layout.fruchterman.reingold(g) # to fix coordinates of nodes

# Snapshot 
par(mfrow=c(2,2), mar=c(1,1,1,1)) # combine multiple plots
plot(g, main="Day 1", vertex.color=ifelse(V(g)%in%infected[[1]],"tomato","cadetblue3"), layout=l, vertex.label=NA)
plot(g, main="Day 8", vertex.color=ifelse(V(g)%in%infected[[8]],"tomato","cadetblue3"), layout=l, vertex.label=NA)
plot(g, main="Day 16", vertex.color=ifelse(V(g)%in%infected[[16]],"tomato","cadetblue3"), layout=l, vertex.label=NA)
plot(g, main=paste("Day",i), vertex.color=ifelse(V(g)%in%infected[[i]],"tomato","cadetblue3"), layout=l, vertex.label=NA)

dev.off() # clear plot deck..

# Let's go dynamic
install.packages("animation")
library(animation)

saveGIF({
  # start the plot
  m = 1
  while(m <= length(infected)){
    V(g)$color = "cadetblue3"
    V(g)$color[V(g)%in%infected[[m]]] = "tomato"
    day<-paste("Day",m)
    plot(g, edge.color="gray85", vertex.frame.color="white", vertex.size=10, edge.width=3, layout =l)
    title(day, cex.main=2.5, col.main="dodgerblue4")
    m = m + 1}
},
interval = .8, ani.width = 640, ani.height = 640, movie.name="endemic_animation.gif")


# Let's go geeky: netlogo-like
saveGIF({
  # start the plot
  m = 1
  while(m <= length(infected)){
    par(mfrow=c(1,2))
    V(g)$color = "cadetblue3"
    V(g)$color[V(g)%in%infected[[m]]] = "tomato"
    day<-paste("Day",m)
    plot(g, edge.color="gray85", vertex.frame.color="white", vertex.size=10, edge.width=3, layout =l)
    title(day, cex.main=1, col.main="dodgerblue4")
    num_cum = unlist(lapply(1:m, function(x) length(infected[[x]]) ))
    p_cum = num_cum/size
    time = 1:m
    plot(p_cum~time, type = "b", ylab = "Proportion Infected", xlab = "Time",
         xlim = c(0,i), ylim =c(0,1), frame.plot = FALSE)
    m = m + 1}
},
interval = .5, ani.width = 800, ani.height = 500, movie.name="netlogo_animation.gif")

### PART 2: STATIC NETWORKS - DIFFUSION OVER REAL VS RANDOM NETWORKS 
## We are going to load one Add Health school network
library(ggplot2)
comm1<-read.graph("comm1.net", format="pajek") %>% as.undirected() %>% simplify()
comm1<-delete.vertices(comm1, which(degree(comm1)==0)) # Remove two isolates from the graph
plot(comm1)

## Key question is whether diffusion processes differ between (commonly observed) clustered networks 
## and random networks, given the same degree distribution -- in terms of its speed and extent, in particular.

# Generate random networks, matching degree distributions (and thus density)
hist(degree(comm1))
mean(degree(comm1))
ran.net<-sample_degseq(degree(comm1), method="simple.no.multiple") # function to generate random graphs with a given degree sequence
?sample_degseq
# Let's see how it looks
plot(ran.net)
graph.density(comm1)
graph.density(ran.net)
degree(comm1)==degree(ran.net)


# Let's make a list of 1 school net and 5 random nets
Net<-list(comm1)
Net[[2]]<-sample_degseq(degree(comm1), method="simple.no.multiple")
Net[[3]]<-sample_degseq(degree(comm1), method="simple.no.multiple")
Net[[4]]<-sample_degseq(degree(comm1), method="simple.no.multiple")
Net[[5]]<-sample_degseq(degree(comm1), method="simple.no.multiple")
Net[[6]]<-sample_degseq(degree(comm1), method="simple.no.multiple")

size<-vcount(comm1) # 69 nodes

## Now we have a total of six networks. We are going to run simple contagion on them, like we did on the toy network
# On the real school network: 50 trials
# On each random network: 10 trials (10 X 5 = 50 trials)

# Same function for updating the diffusers
update_diffusers = function(diffusers){
  nearest_neighbors = unlist(ego(g, 1, diffusers))
  nearest_neighbors = setdiff(nearest_neighbors, diffusers)
  n=length(nearest_neighbors)
  is.infected=sample(c(1,0),n,replace=T, prob=c(p,1-p))
  new_infected = nearest_neighbors[which(is.infected==1)]
  diffusers = unique(c(diffusers, new_infected))
  return(diffusers)
}

record<-data.frame() # initialize a data frame to record infection rate on the fly
seeds<-sample(1:size, 50) # randomly draw 50 seeds -- same seeds to be used both for real and random networks.
p = 0.2 # this time we set dyadic transmission probability as .2

# 1. Repeat 50 times for a real net, p fixed at .2
g<-Net[[1]]
for(t in 1:50){
  infected=list()
  infected[[1]] = seeds[t]
  for(i in 1:40){
    if(length(infected[[i]])<size){ infected[[i+1]]="update_diffusers(infected[[i]])" %="">% sort()
      p_cum = length(infected[[i]])/size
      record<-rbind(record, c(1, t, i, p_cum))
    }
    else{
      infected[[i+1]]<-infected[[i]]
      record<-rbind(record, c(1, t, i, 1))
    }
  }
  print(paste("trial:",t))
}

head(record) # It stores network number, # trial, tick, cumulative infection rate

# 2. Repeat 10 times for each random networks (10 X 5 = 50), p fixed at .2
for(n in 2:length(Net)){
  g<-Net[[n]]
  k=n-1
  seed<-seeds[(10*k-9):(10*k)]
  for(t in 1:10){
    infected=list()
    infected[[1]] = seed[t]
    for(i in 1:40){
      if(length(infected[[i]])<size){ infected[[i+1]]="update_diffusers(infected[[i]])" %="">% sort()
        p_cum = length(infected[[i]])/size
        record<-rbind(record, c(n, t, i, p_cum))
      }
      else{
        infected[[i+1]]<-infected[[i]]
        record<-rbind(record, c(n, t, i, 1))
      }
    }
    print(paste("Network:",k,"Trial:",t))
  }
}

## Simulations done! Let's work on the summary data frame
colnames(record)<-c("net","trial","tick","cum.infected") # Sensible variable names

# We want to calculate average proportion infected across time points -- school / random networks separately 
# School net
sch.rec<-record[which(record$net==1),]
sch.rec<-aggregate(sch.rec$cum.infected,list(sch.rec$tick),mean)
sch.rec$type<-"School Net"
# Random nets
ran.rec<-record[which(record$net!=1),]
ran.rec<-aggregate(ran.rec$cum.infected,list(ran.rec$tick),mean)
ran.rec$type<-"Random Net"

all.rec<-rbind(sch.rec,ran.rec) # Stack the two mean data sets

ggplot(all.rec, aes(Group.1,x,colour=type))+geom_line()+xlab("Time")+ylab("Proportion Infected (Mean)")

### PART 3: DYNAMIC NETWORKS - EpiModel
install.packages("EpiModel",dependencies = TRUE)
install.packages("ndtv")
library("EpiModel")
library("ndtv")

## 1. Network model estimation
N=50 # a network of 50 homogeneous nodes, with no edges between them at the start
nw <- network.initialize(n = N, directed = FALSE)

formation <- ~edges+concurrent # Formula: Our sexual networks will be formed by two parameters: how dense / how frequent people cheat on
mean.edge<-1 # mean edge = 1 in our population
mean.con<-.22 # 22% concurrency = overlapping sexual partnerships
target.stats<-c(mean.edge*(N/2),mean.con*N) # Target statistics based on our specification

# Another parameter: duration. Homogeneous probability of dissolution -- partnerships last avg. 10 time steps
coef.diss <- dissolution_coefs(dissolution = ~offset(edges), duration = 10) 

est <- netest(nw, formation, target.stats, coef.diss, verbose = FALSE) # Network model estimation based on edge, concurrency, duration parameters.

## 2. Epidemic simulation
## Epidemeic model parameters
# 1. Infection probability = the risk of transmission given contact with an infected person. 
# 2. Act rate = the number of sexual acts between that occur within a partnership each time unit
# overall frequency of acts per person per unit time is a function of the incidence rate of partnerships and this act parameter.
# 3. Recovery rate = the speed at which infected persons become susceptible again. 
param <- param.net(inf.prob = 0.8, act.rate = 2, rec.rate=0.01)
init <- init.net(i.num = 2) # seed
control <- control.net(type = "SIS", nsteps = 30, nsims = 1, verbose = FALSE)
sim <- netsim(est, param, init, control)

# Dynamic network visualization
nw <- get_network(sim)
nw <- color_tea(nw, verbose = FALSE)

slice.par <- list(start = 1, end = 30, interval = 1, 
                  aggregate.dur = 1, rule = "any")
render.par <- list(tween.frames = 10, show.time = FALSE)
plot.par <- list(mar = c(0, 0, 0, 0))

compute.animation(nw, slice.par = slice.par)

render.d3movie(
  nw, 
  render.par = render.par, 
  plot.par = plot.par,
  vertex.cex = 1.5, 
  vertex.col = "ndtvcol", 
  edge.col = "darkgrey", 
  vertex.border = "lightgrey",
  displaylabels = FALSE,
  #filename = paste0(getwd(), "/movie.html")
)

## 3. How much does concurrency matter?
# We simulate three times, allowing concurrency parameters to vary
N=500
nw <- network.initialize(n = N, directed = FALSE)
formation <- ~edges
mean.edge<-0.7
target.stats<-mean.edge*(N/2)
coef.diss <- dissolution_coefs(dissolution = ~offset(edges), duration = 100) 
est <- netest(nw, formation, target.stats, coef.diss, verbose = FALSE)
param <- param.net(inf.prob = 0.5, act.rate = 2, rec.rate=0.01)
init <- init.net(i.num = 10)
control <- control.net(type = "SIS", nsteps = 500, nsims = 5, verbose = FALSE)
sim <- netsim(est, param, init, control)


formation.concur <- ~edges+concurrent
mean.con<-.2
target.stats<-c(mean.edge*(N/2),mean.con*N)
est <- netest(nw, formation.concur, target.stats, coef.diss, verbose = FALSE)
sim.concur.1 <- netsim(est, param, init, control)

mean.con<-.25
target.stats<-c(mean.edge*(N/2),mean.con*N)
est <- netest(nw, formation.concur, target.stats, coef.diss, verbose = FALSE)
sim.concur.2 <- netsim(est, param, init, control)

par(mfrow=c(1,3))
plot(sim)
plot(sim.concur.1)
plot(sim.concur.2)

# I acknowledge that these web resources were invaluably helpful in making this tutorial script:
# http://chengjun.github.io/en/2014/03/simulate-network-diffusion-with-R/
# http://statnet.github.io/sb/t1.html            </size){></size){>