DESCRIPTIVE STATISTICS | Duke Network Analysis Center

Download R source file

              #Social Networks and Health Training Program
#Descriptive Network Analyses
#Jonathan H. Morgan and Molly Copeland

#RESOURCES
  #Acton's and Jasny's Statnet Tutorial: https://statnet.org/trac/raw-attachment/wiki/Resources/introToSNAinR_sunbelt_2012_tutorial.pdf
  #Wasserman and Faust's (1994) book, Social Network Analysis: Methods and Applications

#Clearing Old Data
rm(list = ls())
gc()

########################
#   LOADING PACKAGES   #
########################

library (plyr)
library (dplyr)
library (tidyr)
library(statnet)
library(ggplot2)
library(ggnetwork)
#library (igraph)                                 igraph and sna packages are not compatible. Run one or the other.

#To get get more information about the sna and statnet packages
#The statnet package draws on the sna package to compute the majority of its descriptive network statistics.
help(package = sna)
help(package = statnet)

######################
#   IMPORTING DATA   #
######################

#Look for an icon in your task bar to select today's data set: ahs_wpvar.csv
#I chose this import strategy because particpants may have varying levels of familiarity with the computer they are using.
AHS_WPVAR=read.csv(file.choose(),header=TRUE)

#################################################
#   Creating School 7's Edgelist and Nodelist   # 
#################################################

#Step 1: Subsetting AHS_WPVAR to Isolate Schools
#AHS_Community1 <- subset(AHS_WPVAR, AHS_WPVAR[,1] == c(1))
AHS_Community7 <- subset(AHS_WPVAR, AHS_WPVAR[,1] == c(7))

#Step 2: Creating Data Subset for generating the Edgelist
AHS_Edges <- AHS_Community7[c(3, 4:8, 14:18)]


#Step 3: Converting from wide to long data set format using tidyr package 
AHS_EdgeList <- AHS_Edges %>%
  gather(ID, value, mfnid_1:mfnid_5, ffnid_1:ffnid_5, na.rm = TRUE)


#Step 4: Deleting 9999 values from the data subsets; the gather statements have eliminated the other missing values.
#Renaming ego_nid to Sender
#Renaming Value to Target
#Adding a weight variable of 1. 

AHS_EdgeList  <- subset(AHS_EdgeList , AHS_EdgeList [,3] != c(99999)) #We go from 2,659 edges to 2,099 edges
AHS_EdgeList [, 2] <- NULL
names(AHS_EdgeList)[1] <- "Sender"
names(AHS_EdgeList)[2] <- "Target"
AHS_EdgeList [, "weight"] <- c(1)

#Step 5:  Creating the Nodelist
    #Creating an Attributes data set to merge with the Nodelist
    Attributes <- AHS_Community7[c(3, 24, 26, 27)]
    
    #Creating a Nodelist from the Edglist Node Values
    Sender <- AHS_EdgeList [c(1)]
    Target <- AHS_EdgeList [c(2)]
    
    #Renaming the Variable for Appending(New name= Original name)
    Sender <- rename(Sender, ID = Sender) 
    Target <- rename(Target, ID = Target) 
    
    #Appending
    AHS_NodeList <- rbind(Sender, Target)
    
    #Removing Duplicates
    AHS_NodeList <- unique(AHS_NodeList[ , 1])
    AHS_NodeList <- as.data.frame (AHS_NodeList)   #Comparing the attributes file with the node list, we see 17 isolates.
    
    #Merging Attributes
    names(AHS_NodeList)[1] <- "ego_nid"           #Renaming for the purposes of merging
    AHS_NodeList <- merge(AHS_NodeList, Attributes)
    
    
#Step : Removing Non-essential data sets
rm(AHS_Edges, Attributes, Sender, Target)

################################################
#   Constructing and Visualizing the Network   #
################################################

#Step 1: Formatting Sender and Target Variables to Construct a Statnet Network Object
AHS_EdgeList[,1]=as.character(AHS_EdgeList[,1])
AHS_EdgeList[,2]=as.character(AHS_EdgeList[,2])

#Step 2: Creating a Network Object
#Note, this is a directed graph. So, we specify that in the network object now. 
#The specification of the graph as either directed or undirected is important because it impacts fundamentally how we interpret the relationships described by the graph.
AHS_Network=network(AHS_EdgeList,matrix.type="edgelist",directed=TRUE) 

#AHS_Graph=graph.data.frame(AHS_EdgeList, directed=TRUE)  Creating an igraph object for comparison

#Step 3: Calculating Network Measures to Create Network Attributes for Visualization Purposes, More on the Measures Soon
Eigen <- evcent(AHS_Network)                          #Computing the eigenvector centrality of each node
InDegree <- degree(AHS_Network, cmode="indegree")     #Computing the in-degree of each node
InDegree <- InDegree * .15                            #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes

#Step 4: Creating Network Attributes
  #Specifying Colors for Gender and Race
  AHS_NodeList <- AHS_NodeList %>% 
    mutate (Color_Female = ifelse(sex == 2, 'red', ifelse(sex != 2, 'black', 'black')))

  AHS_NodeList <- AHS_NodeList %>% 
    mutate (Color_Race = ifelse(race5 == 0, 'gold', ifelse(race5 == 1, 'chartreuse4', 
          ifelse(race5 == 2, 'blue1', ifelse(race5 == 3, 'brown', ifelse(race5 == 4, 'purple', 'gray0'))))))

  #Creating Vectors to Assign as Attributes to the Network
  Gender <- as.vector(AHS_NodeList$sex)  
  Race <- as.vector(AHS_NodeList$race5)  
  Color_Race <- as.vector(AHS_NodeList$Color_Race)          #Important: 2d network Plots require a vector for an attribute
  Color_Female <- as.vector(AHS_NodeList$Color_Female) 

  #Assigning Attributes to Vertices
  set.vertex.attribute(AHS_Network,"Gender",Gender)
  set.vertex.attribute(AHS_Network,"Race",Race)
  set.vertex.attribute(AHS_Network,"Color_Race",Color_Race)
  set.vertex.attribute(AHS_Network,"Color_Female",Color_Female)
  set.vertex.attribute(AHS_Network, "InDegree", InDegree)

#Step 5: Visualizing the Network
AHS_Network
summary(AHS_Network)                                        #Get numerical summaries of the network
  
set.seed(12345)
ggnetwork(AHS_Network) %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_edges(color = "lightgray") +
    geom_nodes(color = Color_Race, size = InDegree) +       
    #geom_nodelabel_repel (color = Race, label = Race) +#   For networks with fewer nodes, we might want to label
    theme_blank() + 
    geom_density_2d()

############################
#   FUNDAMENTAL CONCEPTS   #
############################

#Node: An entity such as an social actor, firm, or organism. 
#Nodes can represent almost anything, as long as there is some meaningful set of relationships between the entities.

#Edge: A relationship between a pair of nodes where the relationship is nondirectional (e.g., kinship relationships or co-memberships in organizations).
#Arc: A directed relationship such as friendships. I can be friends with Jake, but Jake may not necessarily be my friend. Sad for me.

#Graph: A set of nodes and edges. The relationships are nondirectional and dichotomous (We are either kin or not.)
#Di-Graph: A set of nodes and arcs. The relationships are directional and can either be dichotomous or weighted. 

#Network: A graph or di-graph where the nodes have attributes assigned to them such as names, genders, or sizes. 

#Basic Measures
  #Network Size: We also know this from the number of obsevations in the Nodelist
  network.size(AHS_Network)

  #Number of Edges: Corresponds to the number of observations in the edgelist
  network.edgecount(AHS_Network) 

  #Number of Dyads (Node Pairs)
  network.dyadcount(AHS_Network) 

#############################
#   SYSTEM LEVEL MEASURES   #
#############################

#Density: The ratio of Observed Ties/All Possible Ties
gden(AHS_Network, mode = 'digraph')

#Degree Distribution
#Calculating In-Degree and Out-Degree to Visualize the Total Degree Distribution: What is the distribution of Connectiveness?
InDegree <- degree(AHS_Network, cmode="indegree")     #Computing the in-degree of each node
OutDegree <- degree(AHS_Network, cmode="outdegree")   #Computing the out-degree of each node

par(mar = rep(2, 4))
par(mfrow=c(2,2)) # Set up a 2x2 display
hist(InDegree, xlab="Indegree", main="In-Degree Distribution", prob=FALSE)
hist(OutDegree, xlab="Outdegree", main="Out-Degree Distribution", prob=FALSE)
hist(InDegree+OutDegree, xlab="Total Degree", main="Total Degree Distribution", prob=FALSE)
par(mfrow=c(1,1)) # Restore display

#Average Path Length 
  #Walks: A walk is a sequence of nodes and ties, starting and ending with nodes, in which each node is incident with the edges
        #...following and preceding it in the sequence (Wasserman and Faust 1994, p. 105).
        # The beginning and ending node of a walk may be differeent, some nodes may be included more than once, and some ties may be included more than once.
  #Paths: A path is a walk where all the nodes and all the ties are distinct.
  #A shortest path between two nodes is refrred to as a geodesic (Wasserman and Faust 1994, p. 110)
  #Average path length or the geodesic distance is the average number of steps along the shortest paths for all possible pairs of nodes.

# By default, nodes that cannot reach each other have a geodesic distance of infinity. 
# Because, Inf is the constant for infinity, we need to replace INF values to calculate the shortest path length.
# Here we replace infinity values with 0 for visualization purposes.

AHS_Geo <- geodist(AHS_Network, inf.replace=0)
#AHS_Geo <- geodist(AHS_Network)                #Matrix with Infinity
AHS_Geo

#The length of the shortest path for all pairs of nodes.
AHS_Geo$gdist 

#The number of shortest path for all pairs of nodes.
AHS_Geo$counts  

#Shortest Path Matrix
Geo_Dist = AHS_Geo$gdist
hist(Geo_Dist)

#For non-zero paths, we see the distirubtion is approximately centered around 4.5.
#If we compare to iGraph's reported value of 4.496353, this seems reasonable.

#average.path.length(AHS_Graph, directed=TRUE, unconnected=TRUE)

#Global Clustering Coefficient: Transitivity
#Transitivity: A triad involving actors i, j, and k is transitive if whenever i --> j and j --> k then i --> k (Wasserman and Faust 1994, p. 243)
gtrans(AHS_Network)
  #Weak and Weak Census
  #Weak transitivity is the most common understanding, the one reflected in Wasserman's and Faust's definition.
  #When 'weak' is specified as the measure, R returns the fraction of potentially intransitive triads obeying the weak condition
  #Transitive Triads/Transtive and Intransitive Triads.
  #In contrast, when 'weak census' is specfified, R returns the count of transitive triads.
  gtrans(AHS_Network, mode='digraph', measure='weak')
  gtrans(AHS_Network, mode='digraph', measure='weakcensus')

#CUG (Conditional Uniform Graph) Tests:  IS this Graph More Clustered than We Would Expect by Chance
#See Wasserman and Faust 1994, p. 543-545 for more information.
#Note: These tests are somewhat computationally intensive.
    #Conducting these tests, we find that athough the transitivity is higher than would be expect by chance given the network's size;
    #...it is not greater than would be expected given either the number of edges or dyads.
  
  #Test transitivity against size
  Cug_Size <- cug.test(AHS_Network,gtrans,cmode="size")
  plot(Cug_Size)

 #Test transitivity against density
  Cug_Edges <- cug.test(AHS_Network,gtrans,cmode="edges")
  plot(Cug_Edges)
  
  #Test Transitivity against the Dyad Census
  Cug_Dyad <- cug.test(AHS_Network,gtrans,cmode="dyad.census")
  plot(Cug_Dyad)

###########################
#   MESO-LEVEL MEASURES   #
###########################

#Dyads
  #Null-Dyads: Pairs of nodes with no arcs between them
  #Asymmetric dyads: Pairs of nodes that have an arc between the two nodes going in one direction or the other, but not both
  #Mutual/Symmetric Dyad: Pairs of nodes that have arcs going to and from both nodes  <--> 
  
#Number of Symmetric Dyads
mutuality(AHS_Network)

#Dyadic Ratio: Ratio of Dyads where (i,j)==(j,i) to all Dyads
grecip(AHS_Network, measure="dyadic")

#Edgwise Ratio: Ratio of Reciprocated Edges to All Edges
grecip(AHS_Network, measure="edgewise")

#Directed Triad Census
#Triads can be in Four States
  #Empty: A, B, C
  #An Edge: A -> B, C
  #A Star (2 Edges): A->B->C
  #Closed: A->B->C->A

#Triad types (per Davis & Leinhardt):
  #003  A, B, C, empty triad.
  #012  A->B, C 
  #102  A<->B, C  
  #021D A<-B->C 
  #021U A->B<-C 
  #021C A->B->C
  #111D A<->B<-C
  #111U A<->B->C
  #030T A->B<-C, A->C
  #030C A<-B<-C, A->C.
  #201  A<->B<->C.
  #120D A<-B->C, A<->C.
  #120U A->B<-C, A<->C.
  #120C A->B->C, A<->C.
  #210  A->B<->C, A<->C.
  #300  A<->B<->C, A<->C, completely connected.

triad.census(AHS_Network)

#Hierarchy Measures: Components,Cut Points, K-Cores, and Cliques
  #Components: Components are maximally connected subgraphs (Wasserman and Faust 1994, p. 109). 
  #Recall that community 7 has two large components and several small dyads and triads.
  #There are two types of components: strong and weak.
    #Strong components are components connected through directed paths (i --> j, j --> i)
    #Weak components are components connected through semi-paths (--> i <-- j --> k)
  components(AHS_Network, connected="strong")
  components(AHS_Network, connected="weak")
  
  #Which node belongs to which component?
  AHS_Comp <- component.dist(AHS_Network, connected="strong")
  AHS_Comp
  
  AHS_Comp$membership # The component each node belongs to
  AHS_Comp$csize      # The size of each component
  AHS_Comp$cdist      # The distribution of component sizes
  
  #Cut-Sets and Cut-Points: Cut-sets describe the connectivity of the graph based on the removal of nodes, while cut-points describe
  #...the connectivity of the graph based on the removal of lines (Harary 1969)
  #k refers to the number of nodes or lines that would need to be removed to reduce the graph to a disconnected state.
  cutpoints(AHS_Network, connected="strong")
  gplot(AHS_Network,vertex.col=2+cutpoints(AHS_Network,mode="graph",return.indicator=T))
    #The plot only shows subgraphs consisting of nodes with a degree of 2 or more.
    #The green nodes indicate cut-ponts where the removal of the node would separate one subgraph from another.
  
    #Let's remove one of the cutpoints and count components again.
    AHS_Cut <- AHS_Network[-11,-11]
    #"-11" selects all the elments in the first row/column.
    #So, AHS_Cut will be AHS_Network with node 1 removed.
    
    components(AHS_Cut, connected="strong")  #There are 74 strong components in AHS_Cut compared to 73 in AHS_Network
    
    #Bi-Components: Bi-Components refer to subgraphs that require at least the removal of two nodes or two lines to transform it into a 
    #...disconnected set of nodes. 
    #In large highly connected networks, we frequently analyze the properties of the largest bi-component to get a better understanding
    #...of the social system represented by the network.
    bicomponent.dist(AHS_Network) 
    
  #Identify Cohesive Subgroups
    #K-Cores: A k-core is a subgraph in which each node is adjacent to at least a minimum number of, k, to the other nodes in the subgraph.
    #..., while a k-plex specifies the acceptable number of lines that can be absent from each node (Wasserman and Faust 1994, p. 266). 
  kcores(AHS_Network) 
  #Show the nesting of cores
  AHS_kc<-kcores(AHS_Network,cmode="indegree")
  gplot(AHS_Network,vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc+1])

  #Now, showing members of the 4-core only (All Nodes Have to Have a Degree of 4)
  gplot(AHS_Network[AHS_kc>3,AHS_kc>3],vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc[AHS_kc>3]+1])
  
  #Cliques:  A clique is a maximally complete subgraph of three or more nodes.
  #In other words, a clique consists of a subset of nodes, all of which are adjacent to each other, and where there are no other 
  #...nodes that are also adjacent to all of the members of the clique (Luce and Perry 1949)
  
  #We need to symmetrize recover all ties between i and j.
  set.network.attribute(AHS_Network, "directed", FALSE) 
  
  #The clique census returns a list with several important elements 
  #Let's assign that list to an object we'll call AHS_Cliques.
      #The clique.comembership parameter takes values "none" (no co-membership is computed),
      #"sum" (the total number of shared cliques for each pair of nodes is computed),
      #bysize" (separate clique co-membership is computed for each clique size)
  
  AHS_Cliques <- clique.census(AHS_Network, mode = "graph", clique.comembership="sum")
  AHS_Cliques # an object that now contains the results of the clique census
  
      #The first element of the result list is clique.count: a matrix containing the number of cliques of different 
      #...sizes (size = number of nodes in the clique).
      #The first column (named Agg) gives you the total  number of cliqies of each size,
      #The rest of the columns show the number of cliques each node participates in.
  
  #Note that this includes cliques of sizes 1 & 2. We have those when the largest fully connected structure includes just 1 or 2 nodes.
  AHS_Cliques$clique.count

  #The second element is the clique co-membership matrix:
  AHS_Cliques$clique.comemb
  
  # The third element of the clique census result is a list of all found cliques:
  # (Remember that a list can have another list as its element)
  AHS_Cliques$cliques # a full list of cliques, all sizes
  
  AHS_Cliques$cliques[[1]] # cliques size 1
  AHS_Cliques$cliques[[2]] # cliques of size 2
  AHS_Cliques$cliques[[3]] # cliques of size 3
  AHS_Cliques$cliques[[4]] # cliques of size 4

###########################
#   NODE LEVEL MEASURES   #
###########################
  
#Restoring Our Directed Network
set.network.attribute(AHS_Network, "directed", TRUE) 

#Reachability
#An actor is "reachable" by another if there exists any set of connections by which we can trace from the source to the target actor, 
#regardless of how many other nodes fall between them (Wasserman and Faust 1994, p. 132).
#If the network is a directed network, then it possible for actor i to be able to reach actor j, but for j not to be able to reach i.
#We can classify how connected one node is to another by considering the types of paths connecting them.
  #Weakly Connected: The nodes are connected by a semi-path (--> i <--- j ---> k)
  #Unilaterally Connected: The nodes are connected by a path (i --> j --> k)
  #Strongly Connected: The nodes are connected by a path from i to k and a path from k to i.
  #Recursively Connected: The nodes are strongly connected, and the nodes along the path from i to k and from k to i are the same in reverse order.
    #e.g., i <--> j <--> k 
  
#Interpreting the reachability matrix, the first column indicates a specific node, the second an alter (alters can occur multiple times),
#and the third column indicates the number of paths connecting the two (total is a cumulative count of the number of paths in the network).
#For example, interpreting row 2, node 2 can reach node 235 through 235 paths (470-235), whereas in the middle of the list node 343 can reach node 1 through only 1 path.
reachability(AHS_Network) 
??reachablity #For more information on this measure

#Degree Centraltiy: Total, In-Degree, Out-Degree
  
  #In-Degree Centrality: The number of nodes adjacent to node i (Wasserman and Faust 1994, p. 126). i <--
  InDegree <- degree(AHS_Network, cmode="indegree")
  InDegree <- InDegree * .15                #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes
  
  set.vertex.attribute(AHS_Network, "InDegree", InDegree)
  
  #Out-Degree Centrality: The number of nodes adjacent from node i (Wasserman and Faust, p. 126). i -->
  OutDegree <- degree(AHS_Network, cmode="outdegree")
  OutDegree <- OutDegree * .5                 #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes
  
  set.vertex.attribute(AHS_Network, "OutDegree", OutDegree)
  
  #Total Degree Centrality: The Total Number of Adjacent Nodes (In-Degree + Out-Degree)
  TotalDegree <- OutDegree + InDegree
  TotalDegree <- TotalDegree * .4
  
  set.vertex.attribute(AHS_Network, "TotalDegree", TotalDegree)
  
  #Try Sizing by the Different Degrees
  set.seed(12345)
  ggnetwork(AHS_Network) %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_edges(color = "lightgray") +
    geom_nodes(color = Color_Race, size = InDegree) +       
    #geom_nodelabel_repel (color = Race, label = Race) +#   For networks with fewer nodes, we might want to label
    theme_blank() + 
    geom_density_2d()

#Path Centralities: Closeness Centrality, Information Centrality, Betweenness Centrality
  
  #Closeness Centrality: Closeness centrality measures the geodesic distances of node i to all other nodes.
  #Functionally, this measures range from 0 to 1, and is the inverse average distance between actor i and all other actors (Wasserman and Faust 1994, p. 185)
  #This measure does not work well when there are disconnected components because the distances between components cannot be summed as
  #...they are technically infinite. There are several work arounds, see Acton and Jasny's alternative below.
  
  AHS_Closeness <- closeness(AHS_Network, gmode="digraph", cmode="directed")
  AHS_Closeness
  hist(AHS_Closeness , xlab="Closness", prob=TRUE) 

  #Alternative Approach to Measuring Closesness from the Geodesic Distances Matrix from Acton's and Jasny's Statnet Tutorial
  Closeness <- function(x){ # Create an alternate closeness function!
    geo <- 1/geodist(x)$gdist # Get the matrix of 1/geodesic distance
    diag(geo) <- 0 # Define self-ties as 0
    apply(geo, 1, sum) # Return sum(1/geodist) for each vertex
  }
  
  Closeness <-  Closeness(AHS_Network)                        #Applying the function
  Closeness
  hist( Closeness , xlab="Alt. Closeness", prob=TRUE)         #Better behaved!
  
  #Information Centrality: Information Centrality measures the information flowing from node i.
  #In general, actors with higher information centrality are predicted to have greater control over the flow of information within a network.
  #Highly information-central individuals tend to have a large number of short paths to many others within the social structure.
  ?infocent  #For more information
  
  AHS_Info <- infocent(AHS_Network, rescale=TRUE)
  AHS_Info
  hist(AHS_Info , xlab="Information Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=(AHS_Info)*250, gmode="graph") # Use w/gplot
  #As suggested by the histogram there is relatively little variation in information centrality in this graph.
  
  #Betweenness Centrality: The basic intuition behind Betweenness Centrality is that the actor between all the other actors in the 
  #...has some control over the paths in the network. 
  #Functionally, Betweenness Centrality is the ratio of the sum of all shortest paths linking j and k that includes node i over 
  #...all the shortest paths linking j and k (Wasserman and Faust 1994, p. 191)
  
  AHS_Betweenness <- betweenness(AHS_Network, gmode="digraph")  
  AHS_Betweenness
  hist(AHS_Betweenness , xlab="Betweenness Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=sqrt(AHS_Betweenness)/25, gmode="digraph") 
  
  #Comparing Closeness and Betweenness Centralities
  cor(Closeness, AHS_Betweenness)                             #Correlate our adjusted measure of closeness with betweenness
  plot(Closeness, AHS_Betweenness)                            #Plot the bivariate relationship
  
#Measures of Power in Influence Networks: Bonachich and Eigenvector Centrality
  
  #Bonachich Centrality: The intuition behind Bonachich Power Centrality is that the power of node i is recursively defined 
  #...by the sum of the power of its alters. 
  #The nature of the recursion involved is then controlled by the power exponent: positive values imply that vertices become 
  #...more powerful as their alters become more powerful (as occurs in cooperative relations), while negative values imply 
  #...that vertices become more powerful only as their alters become weaker (as occurs in competitive or antagonistic relations).
  ?bonpow   #For more information about the measure
  
  #Eigenvector Centrality: Conceptually, the logic behind eigenvectory centrality is that node i's influence is proportional to the 
  #...to the centraltities' of the nodes adjacent to node i. In other words, we are important because we know highly connected people.
  #Mathematically, we capture this concept by calculating the values of the first eigenvector of the graph's adjacency matrix.
  ?evcent   #For more information.
  
  AHS_Eigen <- evcent(AHS_Network)
  AHS_Eigen
  hist(AHS_Eigen , xlab="Eigenvector Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=AHS_Eigen*10, gmode="digraph") 

###########################
#   POSITIONAL ANALYSIS   #
###########################

#Burt's (1992) measures of structural holes are supported by iGraph and ego network variants of these measures are supported by egonet
#...the egonet package is compatable with the sna package.
  
#You can find descriptions and code to run Burt's measures in igraph at: http://igraph.org/r/doc/constraint.html
  
  #Brokerage: The brokerage measure included in the SNA package builds on past work on borkerage (Marsden 1982), but is a more 
  #...explicitly group oriented measure. Unlike Burt's (1992) measure, the Gould-Fernandez measure requires specifying a group variable
  #...based on an attribute. I use race in the example below.
  
    #Brokerage Roles: Group-Based Concept
    #w_I: Coordinator Role (Mediates Within Group Contact)
    #w_O: Itinerant Broker Role (Mediates Contact between Individuals in a group to which the actor does not belong)
    #b_{IO}: Representative: (Mediates incoming contact from out-group members)
    #b_{OI}: Gatekeeper: (Mediates outgoing contact from in-group members)
    #b_O: Liason Role: (Mediates contact between individuals of two differnt groups, neither of which the actor belongs)
    #t: Total or Cumulative Brokerage (Any of the above paths)
  ?brokerage   #for more information
  
  AHS_Brokerage <- brokerage(AHS_Network, Race)
  AHS_Brokerage
  hist(AHS_Brokerage$cl, xlab="Cumulative Brokerage", prob=TRUE) 
  
  AHS_CBrokerage <- (AHS_Brokerage$cl)
  gplot(AHS_Network, vertex.cex=AHS_CBrokerage*.5, gmode="digraph") 

#Structural Equivalence
  #Structural equivalence: Similarity/Distance Measures Include:
    #Correlation
    #Euclidean Distance
    #Hamming Distance
    #Gamma Correlation
  sedist(AHS_Network, mode="digraph", method="correlation")
  
  #Cluster based on structural equivalence:
  AHS_Clustering <- equiv.clust(AHS_Network, mode="digraph",plabels=network.vertex.names(AHS_Network))
  AHS_Clustering                        #Specification of the equivalence method used
  plot(AHS_Clustering)                  #Plot the dendrogram
  rect.hclust(AHS_Clustering$cluster, h=30)
  
  #Generating a Block Model based on the Structural Equivalence Clustering
  AHS_BM <- blockmodel(AHS_Network, AHS_Clustering, h=30)
  AHS_BM
    
  #Extract the block image for Visualization
  bimage <- AHS_BM$block.model
  bimage
  bimage[is.nan(bimage)] <- 1
  
  #Visualizing the block image (with self-reflexive ties)
  gplot(bimage, diag=TRUE, edge.lwd=bimage*5, vertex.cex=sqrt(table(AHS_BM$block.membership))/2,
        gmode="graph", vertex.sides=50, vertex.col=gray(1-diag(bimage)))