### 2016 DNAC Social Networks & Health Workshop ### Peer Influence & Diffusion lab ### Author: Jaemin Lee, Duke University ### Part 1. STATIC NETWORKS - A Simple SI Simulation on a Toy Graph from Scratch setwd("your working directory") # Save "comm1.net" in this directory! We will later read in that data. library(igraph) # Make a toy tree-like network with 50 nodes size=50 g = graph.tree(size, children = 2, mode="undirected") plot(g) # Initialize the diffusers seeds_num = 1 # Let's begin with 1 seed for simplicity set.seed(2016); diffusers = sample(1:vcount(g),seeds_num); diffusers # Randomly choose the seed diffusers # Who is it? g[[diffusers,]] # Who's connected to it? nearest_neighbors<-unlist(g[[diffusers,]]) p=0.4 # let's say dyadic transmission probability is .4 sample(c(1,0),length(nearest_neighbors),replace=T, prob=c(p,1-p)) # Draw 1 or 0 based on p is.infected<-sample(c(1,0),length(nearest_neighbors),replace=T, prob=c(p,1-p)) is.infected new.infected<-nearest_neighbors[which(is.infected==1)] # Get IDs for newly infected nodes among neighbors diffusers<-c(diffusers,new.infected) # Update diffusers diffusers # Visualize it V(g)$color = "white" V(g)$color[V(g)%in%diffusers] = "red" plot(g) ## Repeat this procedure by making a loop # function for updating the spreaders update_diffusers = function(diffusers){ nearest_neighbors = unlist(ego(g, 1, diffusers)) nearest_neighbors = setdiff(nearest_neighbors, diffusers) n=length(nearest_neighbors) is.infected=sample(c(1,0),n,replace=T, prob=c(p,1-p)) new_infected = nearest_neighbors[which(is.infected==1)] diffusers = unique(c(diffusers, new_infected)) return(diffusers) } infected=list() # Initialize a list to record who's infected infected[[1]] = sample(1:vcount(g),seeds_num) # First seed # Start the contagion processes i = 1 while(length(infected[[i]]) < size){ infected[[i+1]] = update_diffusers(infected[[i]]) %>% sort() print(paste("Tick:",i,"Infected:",length(infected[[i]]))) i = i + 1 } ## Visualizations # Diffusion curve num_cum = lapply(1:i, function(x) length(infected[[x]])) %>% unlist() p_cum = num_cum/max(num_cum) time = 1:i plot(main="Proportion infected",p_cum~time, type = "b") # type "b" -- plotting Both lines and points # Network visualization l<-layout.fruchterman.reingold(g) # to fix coordinates of nodes # Snapshot par(mfrow=c(2,2), mar=c(1,1,1,1)) # combine multiple plots plot(g, main="Day 1", vertex.color=ifelse(V(g)%in%infected[[1]],"tomato","cadetblue3"), layout=l, vertex.label=NA) plot(g, main="Day 8", vertex.color=ifelse(V(g)%in%infected[[8]],"tomato","cadetblue3"), layout=l, vertex.label=NA) plot(g, main="Day 16", vertex.color=ifelse(V(g)%in%infected[[16]],"tomato","cadetblue3"), layout=l, vertex.label=NA) plot(g, main=paste("Day",i), vertex.color=ifelse(V(g)%in%infected[[i]],"tomato","cadetblue3"), layout=l, vertex.label=NA) dev.off() # clear plot deck.. # Let's go dynamic install.packages("animation") library(animation) saveGIF({ # start the plot m = 1 while(m <= length(infected)){ V(g)$color = "cadetblue3" V(g)$color[V(g)%in%infected[[m]]] = "tomato" day<-paste("Day",m) plot(g, edge.color="gray85", vertex.frame.color="white", vertex.size=10, edge.width=3, layout =l) title(day, cex.main=2.5, col.main="dodgerblue4") m = m + 1} }, interval = .8, ani.width = 640, ani.height = 640, movie.name="endemic_animation.gif") # Let's go geeky: netlogo-like saveGIF({ # start the plot m = 1 while(m <= length(infected)){ par(mfrow=c(1,2)) V(g)$color = "cadetblue3" V(g)$color[V(g)%in%infected[[m]]] = "tomato" day<-paste("Day",m) plot(g, edge.color="gray85", vertex.frame.color="white", vertex.size=10, edge.width=3, layout =l) title(day, cex.main=1, col.main="dodgerblue4") num_cum = unlist(lapply(1:m, function(x) length(infected[[x]]) )) p_cum = num_cum/size time = 1:m plot(p_cum~time, type = "b", ylab = "Proportion Infected", xlab = "Time", xlim = c(0,i), ylim =c(0,1), frame.plot = FALSE) m = m + 1} }, interval = .5, ani.width = 800, ani.height = 500, movie.name="netlogo_animation.gif") ### PART 2: STATIC NETWORKS - DIFFUSION OVER REAL VS RANDOM NETWORKS ## We are going to load one Add Health school network library(ggplot2) comm1<-read.graph("comm1.net", format="pajek") %>% as.undirected() %>% simplify() comm1<-delete.vertices(comm1, which(degree(comm1)==0)) # Remove two isolates from the graph plot(comm1) ## Key question is whether diffusion processes differ between (commonly observed) clustered networks ## and random networks, given the same degree distribution -- in terms of its speed and extent, in particular. # Generate random networks, matching degree distributions (and thus density) hist(degree(comm1)) mean(degree(comm1)) ran.net<-sample_degseq(degree(comm1), method="simple.no.multiple") # function to generate random graphs with a given degree sequence ?sample_degseq # Let's see how it looks plot(ran.net) graph.density(comm1) graph.density(ran.net) degree(comm1)==degree(ran.net) # Let's make a list of 1 school net and 5 random nets Net<-list(comm1) Net[[2]]<-sample_degseq(degree(comm1), method="simple.no.multiple") Net[[3]]<-sample_degseq(degree(comm1), method="simple.no.multiple") Net[[4]]<-sample_degseq(degree(comm1), method="simple.no.multiple") Net[[5]]<-sample_degseq(degree(comm1), method="simple.no.multiple") Net[[6]]<-sample_degseq(degree(comm1), method="simple.no.multiple") size<-vcount(comm1) # 69 nodes ## Now we have a total of six networks. We are going to run simple contagion on them, like we did on the toy network # On the real school network: 50 trials # On each random network: 10 trials (10 X 5 = 50 trials) # Same function for updating the diffusers update_diffusers = function(diffusers){ nearest_neighbors = unlist(ego(g, 1, diffusers)) nearest_neighbors = setdiff(nearest_neighbors, diffusers) n=length(nearest_neighbors) is.infected=sample(c(1,0),n,replace=T, prob=c(p,1-p)) new_infected = nearest_neighbors[which(is.infected==1)] diffusers = unique(c(diffusers, new_infected)) return(diffusers) } record<-data.frame() # initialize a data frame to record infection rate on the fly seeds<-sample(1:size, 50) # randomly draw 50 seeds -- same seeds to be used both for real and random networks. p = 0.2 # this time we set dyadic transmission probability as .2 # 1. Repeat 50 times for a real net, p fixed at .2 g<-Net[[1]] for(t in 1:50){ infected=list() infected[[1]] = seeds[t] for(i in 1:40){ if(length(infected[[i]])% sort() p_cum = length(infected[[i]])/size record<-rbind(record, c(1, t, i, p_cum)) } else{ infected[[i+1]]<-infected[[i]] record<-rbind(record, c(1, t, i, 1)) } } print(paste("trial:",t)) } head(record) # It stores network number, # trial, tick, cumulative infection rate # 2. Repeat 10 times for each random networks (10 X 5 = 50), p fixed at .2 for(n in 2:length(Net)){ g<-Net[[n]] k=n-1 seed<-seeds[(10*k-9):(10*k)] for(t in 1:10){ infected=list() infected[[1]] = seed[t] for(i in 1:40){ if(length(infected[[i]])% sort() p_cum = length(infected[[i]])/size record<-rbind(record, c(n, t, i, p_cum)) } else{ infected[[i+1]]<-infected[[i]] record<-rbind(record, c(n, t, i, 1)) } } print(paste("Network:",k,"Trial:",t)) } } ## Simulations done! Let's work on the summary data frame colnames(record)<-c("net","trial","tick","cum.infected") # Sensible variable names # We want to calculate average proportion infected across time points -- school / random networks separately # School net sch.rec<-record[which(record$net==1),] sch.rec<-aggregate(sch.rec$cum.infected,list(sch.rec$tick),mean) sch.rec$type<-"School Net" # Random nets ran.rec<-record[which(record$net!=1),] ran.rec<-aggregate(ran.rec$cum.infected,list(ran.rec$tick),mean) ran.rec$type<-"Random Net" all.rec<-rbind(sch.rec,ran.rec) # Stack the two mean data sets ggplot(all.rec, aes(Group.1,x,colour=type))+geom_line()+xlab("Time")+ylab("Proportion Infected (Mean)") ### PART 3: DYNAMIC NETWORKS - EpiModel install.packages("EpiModel",dependencies = TRUE) install.packages("ndtv") library("EpiModel") library("ndtv") ## 1. Network model estimation N=50 # a network of 50 homogeneous nodes, with no edges between them at the start nw <- network.initialize(n = N, directed = FALSE) formation <- ~edges+concurrent # Formula: Our sexual networks will be formed by two parameters: how dense / how frequent people cheat on mean.edge<-1 # mean edge = 1 in our population mean.con<-.22 # 22% concurrency = overlapping sexual partnerships target.stats<-c(mean.edge*(N/2),mean.con*N) # Target statistics based on our specification # Another parameter: duration. Homogeneous probability of dissolution -- partnerships last avg. 10 time steps coef.diss <- dissolution_coefs(dissolution = ~offset(edges), duration = 10) est <- netest(nw, formation, target.stats, coef.diss, verbose = FALSE) # Network model estimation based on edge, concurrency, duration parameters. ## 2. Epidemic simulation ## Epidemeic model parameters # 1. Infection probability = the risk of transmission given contact with an infected person. # 2. Act rate = the number of sexual acts between that occur within a partnership each time unit # overall frequency of acts per person per unit time is a function of the incidence rate of partnerships and this act parameter. # 3. Recovery rate = the speed at which infected persons become susceptible again. param <- param.net(inf.prob = 0.8, act.rate = 2, rec.rate=0.01) init <- init.net(i.num = 2) # seed control <- control.net(type = "SIS", nsteps = 30, nsims = 1, verbose = FALSE) sim <- netsim(est, param, init, control) # Dynamic network visualization nw <- get_network(sim) nw <- color_tea(nw, verbose = FALSE) slice.par <- list(start = 1, end = 30, interval = 1, aggregate.dur = 1, rule = "any") render.par <- list(tween.frames = 10, show.time = FALSE) plot.par <- list(mar = c(0, 0, 0, 0)) compute.animation(nw, slice.par = slice.par) render.d3movie( nw, render.par = render.par, plot.par = plot.par, vertex.cex = 1.5, vertex.col = "ndtvcol", edge.col = "darkgrey", vertex.border = "lightgrey", displaylabels = FALSE, #filename = paste0(getwd(), "/movie.html") ) ## 3. How much does concurrency matter? # We simulate three times, allowing concurrency parameters to vary N=500 nw <- network.initialize(n = N, directed = FALSE) formation <- ~edges mean.edge<-0.7 target.stats<-mean.edge*(N/2) coef.diss <- dissolution_coefs(dissolution = ~offset(edges), duration = 100) est <- netest(nw, formation, target.stats, coef.diss, verbose = FALSE) param <- param.net(inf.prob = 0.5, act.rate = 2, rec.rate=0.01) init <- init.net(i.num = 10) control <- control.net(type = "SIS", nsteps = 500, nsims = 5, verbose = FALSE) sim <- netsim(est, param, init, control) formation.concur <- ~edges+concurrent mean.con<-.2 target.stats<-c(mean.edge*(N/2),mean.con*N) est <- netest(nw, formation.concur, target.stats, coef.diss, verbose = FALSE) sim.concur.1 <- netsim(est, param, init, control) mean.con<-.25 target.stats<-c(mean.edge*(N/2),mean.con*N) est <- netest(nw, formation.concur, target.stats, coef.diss, verbose = FALSE) sim.concur.2 <- netsim(est, param, init, control) par(mfrow=c(1,3)) plot(sim) plot(sim.concur.1) plot(sim.concur.2) # I acknowledge that these web resources were invaluably helpful in making this tutorial script: # http://chengjun.github.io/en/2014/03/simulate-network-diffusion-with-R/ # http://statnet.github.io/sb/t1.html