DESCRIPTIVE STATS | Duke Network Analysis Center

Download R source file

              #Social Networks and Health Training Program
#Descriptive Network Analyses
#Jonathan H. Morgan and Molly Copeland

#RESOURCES
  #Acton's and Jasny's Statnet Tutorial: https://statnet.org/trac/raw-attachment/wiki/Resources/introToSNAinR_sunbelt_2012_tutorial.pdf
  #Wasserman and Faust's (1994) book, Social Network Analysis: Methods and Applications

#Clearing Old Data
rm(list = ls())
gc()

########################
#   LOADING PACKAGES   #
########################

library (plyr)
library (dplyr)
library (tidyr)
library(statnet)
library(ggplot2)
library(ggnetwork)
#library (igraph)                                 igraph and sna packages are not compatible. Run one or the other.

#To get get more information about the sna and statnet packages
#The statnet package draws on the sna package to compute the majority of its descriptive network statistics.
help(package = sna)
help(package = statnet)

######################
#   IMPORTING DATA   #
######################

#Look for an icon in your task bar to select today's data set: ahs_wpvar.csv
#I chose this import strategy because particpants may have varying levels of familiarity with the computer they are using.
AHS_WPVAR=read.csv(file.choose(),header=TRUE)

#################################################
#   Creating School 7's Edgelist and Nodelist   # 
#################################################

#############################
#   CREATING THE EDGELIST   #
#############################

#Step 1: Subsetting AHS_WPVAR to Isolate Schools
AHS_Edges <- AHS_WPVAR %>% 
  #Step 1: Selecting Variables of Interest 
  select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, 
         commcnt) %>% 
  #Step 2: Filtering to keep only community 7
  filter(commcnt == 7)

#Step 3: Gathering Columns to Create a Long Data Set
AHS_Edges <- AHS_Edges %>% 
  gather(Alter_Label, value, mfnid_1:mfnid_5, 
         ffnid_1:ffnid_5,na.rm = TRUE)

#Step 4: Deleting 9999 values from the data subsets; 
#the gather statements have eliminated the other missing values.
#Renaming ego_nid to Sender
#Renaming Value to Target
AHS_Edges <- AHS_Edges %>%
  filter (value != 99999)  %>%    #Eliminating 99999 values
                                  #We go from 2,659 edges to 2,099 edges
  select(ego_nid, value) %>%      #Dropping the now redundant ID column.
  rename ( Sender = `ego_nid`,    #Renaming columns to indicate directionality.
           Target = `value`)

####################################################
#   CREATING NODELIST AND SEQUENTIAL NUMERIC IDs   #
####################################################

#Step 1: Creating a Comprehensive Nodelist
AHS_Nodes <- AHS_Edges %>% 
  gather(Alter_Label, value, Sender, Target,
         na.rm = TRUE) %>%
  #Step 2: Dropping the old column headers
  select(value) %>%
  #Step 3: Renaming value ego_nid to merge in attributes
  rename(ego_nid = `value`)

#Step 4: Getting Rid of Duplicates
AHS_Nodes <- AHS_Nodes %>%
  distinct(ego_nid)

#Step 5: Creating Numeric IDs because there are gaps
#in the sequence of ID numbers which can cause errors
AHS_Nodes <- AHS_Nodes %>%
  (add_rownames) %>%                                            #Getting the rownames to create sequential IDs
  rename (Sender_ID = rowname)%>%                               #Renaming rowname to Sender    
  mutate(Sender_ID = as.numeric(Sender_ID)) 

#Step 6: Merging/Joining Numeric IDs into the Edgelist
  #Renaming Variables to merge numeric IDs for Senders
  #Merging the numeric IDs for senders
  #Renamining Variable for Targets
  #Merging the numeric IDs for targets

#Renaming ego_nid to Sender in order to merge with the
#Edgelist
AHS_Nodes <- AHS_Nodes %>%
  rename(Sender = `ego_nid`)

#Joining Sequential Numeric ID for Senders
AHS_Edges <- AHS_Edges %>%
  left_join(AHS_Nodes, by = c("Sender"))

#Renaming Sender to Target and Sender_ID to Target_ID,
#so we can merge our sequential numeric IDs
AHS_Nodes <- AHS_Nodes %>%
  rename(Target = `Sender`,
         Target_ID = `Sender_ID`)

#Merging Sequential Numeric IDs for Targets
AHS_Edges <- AHS_Edges %>%
  left_join(AHS_Nodes, by = c("Target"))

#Step 7: Tidying Up Our Edgelists and Nodelists

#Because we have the labels in our nodelist,
#We are going to drop our old node labels to avoid
#inducing errors based in gaps in the IDs
AHS_Edges <- AHS_Edges %>%
  select(Sender_ID, Target_ID)  %>%
  rename(Sender = `Sender_ID`,
         Target = `Target_ID`)

#Relabeling Target and Target_ID back to ego_nid and ID
AHS_Nodes <- AHS_Nodes %>%
  rename (ego_nid = `Target`,
          ID = `Target_ID`)

##########################
#   MERGING ATTRIBUTES   #
##########################

AHS_Attributes <- AHS_WPVAR %>%
  #Step 1: Selecting the Variables of Interest
  select(commcnt, ego_nid, sex, grade, race5) %>%
  #Step 2: Flitering to retain only community 7
  filter(commcnt == 7)

#Step 3: Merging/Joining Attributes with the Nodes File
AHS_Nodes <- AHS_Nodes %>%
  left_join(AHS_Attributes, by = c("ego_nid"))

#We are doing a left_join because we only want 
#attributes for vertices(nodes) that appear in our 
#network
    
#Step 4: Tidying: Removing Non-essential data sets
rm(AHS_Attributes)

save(AHS_Edges,file="AHS_Edges.Rda")
save(AHS_Nodes, file="AHS_Nodes.Rda")

################################################
#   Constructing and Visualizing the Network   #
################################################

#Step 1: Formatting Sender and Target Variables to Construct a Statnet Network Object
AHS_Edges[,1]=as.character(AHS_Edges[,1])
AHS_Edges[,2]=as.character(AHS_Edges[,2])

#Step 2: Creating a Network Object
#Note, this is a directed graph. So, we specify that in the network object now. 
#The specification of the graph as either directed or undirected is important because it impacts fundamentally how we interpret the relationships described by the graph.
AHS_Network=network(AHS_Edges,matrix.type="edgelist",directed=TRUE) 

AHS_Network

#Step 3: Calculating Network Measures to Create Network Attributes for Visualization Purposes, More on the Measures Soon
Eigen <- evcent(AHS_Network)                          #Computing the eigenvector centrality of each node
InDegree <- degree(AHS_Network, cmode="indegree")     #Computing the in-degree of each node
InDegree <- InDegree * .15                            #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes

#Step 4: Creating Network Attributes
  #Specifying Colors for Gender and Race
  AHS_Nodes <- AHS_Nodes %>% 
    mutate (Color_Female = ifelse(sex == 2, 'red', ifelse(sex != 2, 'black', 'black')))

  AHS_Nodes <- AHS_Nodes %>% 
    mutate (Color_Race = ifelse(race5 == 0, 'gold', ifelse(race5 == 1, 'chartreuse4', 
          ifelse(race5 == 2, 'blue1', ifelse(race5 == 3, 'brown', ifelse(race5 == 4, 'purple', 'gray0'))))))

  #Creating Vectors to Assign as Attributes to the Network
  Gender <- as.vector(AHS_Nodes$sex)  
  Race <- as.vector(AHS_Nodes$race5)  
  Color_Race <- as.vector(AHS_Nodes$Color_Race)          #Important: 2d network Plots require a vector for an attribute
  Color_Female <- as.vector(AHS_Nodes$Color_Female) 

  #Assigning Attributes to Vertices
  set.vertex.attribute(AHS_Network,"Gender",Gender)
  set.vertex.attribute(AHS_Network,"Race",Race)
  set.vertex.attribute(AHS_Network,"Color_Race",Color_Race)
  set.vertex.attribute(AHS_Network,"Color_Female",Color_Female)
  set.vertex.attribute(AHS_Network, "InDegree", InDegree)

#Step 5: Visualizing the Network
AHS_Network
summary(AHS_Network)                                        #Get numerical summaries of the network
  
set.seed(12345)
ggnetwork(AHS_Network) %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_edges(color = "lightgray") +
    geom_nodes(color = Color_Race, size = InDegree) +       
    #geom_nodelabel_repel (color = Race, label = Race) +#   For networks with fewer nodes, we might want to label
    theme_blank() + 
    geom_density_2d()

############################
#   FUNDAMENTAL CONCEPTS   #
############################

#Node: An entity such as an social actor, firm, or organism. 
#Nodes can represent almost anything, as long as there is some meaningful set of relationships between the entities.

#Edge: A relationship between a pair of nodes where the relationship is nondirectional (e.g., kinship relationships or co-memberships in organizations).
#Arc: A directed relationship such as friendships. I can be friends with Joe, but Joe may not necessarily be my friend. Sad for me.

#Graph: A set of nodes and edges. The relationships are nondirectional and dichotomous (We are either kin or not.)
#Di-Graph: A set of nodes and arcs. The relationships are directional and can either be dichotomous or weighted. 

#Network: A graph or di-graph where the nodes have attributes assigned to them such as names, genders, or sizes. 

#Basic Measures
  #Network Size: We also know this from the number of obsevations in the Nodelist
  network.size(AHS_Network)

  #Number of Edges: Corresponds to the number of observations in the edgelist
  network.edgecount(AHS_Network) 

  #Number of Dyads (Node Pairs)
  network.dyadcount(AHS_Network) 

#############################
#   SYSTEM LEVEL MEASURES   #
#############################

#Density: The ratio of Observed Ties/All Possible Ties
gden(AHS_Network, mode = 'digraph')

#Degree Distribution
#Calculating In-Degree and Out-Degree to Visualize the Total Degree Distribution: What is the distribution of Connectiveness?
InDegree <- degree(AHS_Network, cmode="indegree")     #Computing the in-degree of each node
OutDegree <- degree(AHS_Network, cmode="outdegree")   #Computing the out-degree of each node

par(mar = rep(2, 4))
par(mfrow=c(2,2)) # Set up a 2x2 display
hist(InDegree, xlab="Indegree", main="In-Degree Distribution", prob=FALSE)
hist(OutDegree, xlab="Outdegree", main="Out-Degree Distribution", prob=FALSE)
hist(InDegree+OutDegree, xlab="Total Degree", main="Total Degree Distribution", prob=FALSE)
par(mfrow=c(1,1)) # Restore display

#Average Path Length 
  #Walks: A walk is a sequence of nodes and ties, starting and ending with nodes, in which each node is incident with the edges
        #...following and preceding it in the sequence (Wasserman and Faust 1994, p. 105).
        # The beginning and ending node of a walk may be differeent, some nodes may be included more than once, and some ties may be included more than once.
  #Paths: A path is a walk where all the nodes and all the ties are distinct.
  #A shortest path between two nodes is refrred to as a geodesic (Wasserman and Faust 1994, p. 110)
  #Average path length or the geodesic distance is the average number of steps along the shortest paths for all possible pairs of nodes.

# By default, nodes that cannot reach each other have a geodesic distance of infinity. 
# Because, Inf is the constant for infinity, we need to replace INF values to calculate the shortest path length.
# Here we replace infinity values with 0 for visualization purposes.

AHS_Geo <- geodist(AHS_Network, inf.replace=0)
#AHS_Geo <- geodist(AHS_Network)                #Matrix with Infinity
(AHS_Geo)

#The length of the shortest path for all pairs of nodes.
AHS_Geo$gdist 

#The number of shortest path for all pairs of nodes.
AHS_Geo$counts  

#Shortest Path Matrix
Geo_Dist = AHS_Geo$gdist
hist(Geo_Dist)

#For non-zero paths, we see the distirubtion is approximately centered around 4.5.
#If we compare to iGraph's reported value of 4.496353, this seems reasonable.

#average.path.length(AHS_Graph, directed=TRUE, unconnected=TRUE)

#Global Clustering Coefficient: Transitivity
#Transitivity: A triad involving actors i, j, and k is transitive if whenever i --> j and j --> k then i --> k (Wasserman and Faust 1994, p. 243)
gtrans(AHS_Network)
  #Weak and Weak Census
  #Weak transitivity is the most common understanding, the one reflected in Wasserman's and Faust's definition.
  #When 'weak' is specified as the measure, R returns the fraction of potentially intransitive triads obeying the weak condition
  #Transitive Triads/Transtive and Intransitive Triads.
  #In contrast, when 'weak census' is specfified, R returns the count of transitive triads.
  gtrans(AHS_Network, mode='digraph', measure='weak')
  gtrans(AHS_Network, mode='digraph', measure='weakcensus')

#CUG (Conditional Uniform Graph) Tests:  IS this Graph More Clustered than We Would Expect by Chance
#See Wasserman and Faust 1994, p. 543-545 for more information.
#Note: These tests are somewhat computationally intensive.
    #Conducting these tests, we find that athough the transitivity is higher than would be expect by chance given the network's size;
    #...it is not greater than would be expected given either the number of edges or dyads.
  
  #Test transitivity against size
  Cug_Size <- cug.test(AHS_Network,gtrans,cmode="size")
  plot(Cug_Size)

 #Test transitivity against density
  Cug_Edges <- cug.test(AHS_Network,gtrans,cmode="edges")
  plot(Cug_Edges)
  
  #Test Transitivity against the Dyad Census
  Cug_Dyad <- cug.test(AHS_Network,gtrans,cmode="dyad.census")
  plot(Cug_Dyad)

###########################
#   MESO-LEVEL MEASURES   #
###########################

#Dyads
  #Null-Dyads: Pairs of nodes with no arcs between them
  #Asymmetric dyads: Pairs of nodes that have an arc between the two nodes going in one direction or the other, but not both
  #Mutual/Symmetric Dyad: Pairs of nodes that have arcs going to and from both nodes  <--> 
  
#Number of Symmetric Dyads
mutuality(AHS_Network)

#Dyadic Ratio: Ratio of Dyads where (i,j)==(j,i) to all Dyads
grecip(AHS_Network, measure="dyadic")

#Edgwise Ratio: Ratio of Reciprocated Edges to All Edges
grecip(AHS_Network, measure="edgewise")

#Directed Triad Census
#Triads can be in Four States
  #Empty: A, B, C
  #An Edge: A -> B, C
  #A Star (2 Edges): A->B->C
  #Closed: A->B->C->A

#Triad types (per Davis & Leinhardt):
  #003  A, B, C, empty triad.
  #012  A->B, C 
  #102  A<->B, C  
  #021D A<-B->C 
  #021U A->B<-C 
  #021C A->B->C
  #111D A<->B<-C
  #111U A<->B->C
  #030T A->B<-C, A->C
  #030C A<-B<-C, A->C.
  #201  A<->B<->C.
  #120D A<-B->C, A<->C.
  #120U A->B<-C, A<->C.
  #120C A->B->C, A<->C.
  #210  A->B<->C, A<->C.
  #300  A<->B<->C, A<->C, completely connected.

triad.census(AHS_Network)

#Hierarchy Measures: Components,Cut Points, K-Cores, and Cliques
  #Components: Components are maximally connected subgraphs (Wasserman and Faust 1994, p. 109). 
  #Recall that community 7 has two large components and several small dyads and triads.
  #There are two types of components: strong and weak.
    #Strong components are components connected through directed paths (i --> j, j --> i)
    #Weak components are components connected through semi-paths (--> i <-- j --> k)
  components(AHS_Network, connected="strong")
  components(AHS_Network, connected="weak")
  
  #Which node belongs to which component?
  AHS_Comp <- component.dist(AHS_Network, connected="strong")
  
  AHS_Comp$membership # The component each node belongs to
  AHS_Comp$csize      # The size of each component
  AHS_Comp$cdist      # The distribution of component sizes
  
  #Cut-Sets and Cut-Points: Cut-sets describe the connectivity of the graph based on the removal of nodes, while cut-points describe
  #...the connectivity of the graph based on the removal of lines (Harary 1969)
  #k refers to the number of nodes or lines that would need to be removed to reduce the graph to a disconnected state.
  cutpoints(AHS_Network, connected="strong")
  gplot(AHS_Network,vertex.col=2+cutpoints(AHS_Network,mode="graph",return.indicator=T))
    #The plot only shows subgraphs consisting of nodes with a degree of 2 or more.
    #The green nodes indicate cut-ponts where the removal of the node would separate one subgraph from another.
  
    #Let's remove one of the cutpoints and count components again.
    AHS_Cut <- AHS_Network[-11,-11]
    #"-11" selects all the elments in the first row/column.
    #So, AHS_Cut will be AHS_Network with node 1 removed.
    
    components(AHS_Cut, connected="strong")  #There are 74 strong components in AHS_Cut compared to 73 in AHS_Network
    
    #Bi-Components: Bi-Components refer to subgraphs that require at least the removal of two nodes or two lines to transform it into a 
    #...disconnected set of nodes. 
    #In large highly connected networks, we frequently analyze the properties of the largest bi-component to get a better understanding
    #...of the social system represented by the network.
    bicomponent.dist(AHS_Network) 
    
  #Identify Cohesive Subgroups
    #K-Cores: A k-core is a subgraph in which each node is adjacent to at least a minimum number of, k, to the other nodes in the subgraph.
    #..., while a k-plex specifies the acceptable number of lines that can be absent from each node (Wasserman and Faust 1994, p. 266). 
  kcores(AHS_Network) 
  #Show the nesting of cores
  AHS_kc<-kcores(AHS_Network,cmode="indegree")
  gplot(AHS_Network,vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc+1])

  #Now, showing members of the 4-core only (All Nodes Have to Have a Degree of 4)
  gplot(AHS_Network[AHS_kc>3,AHS_kc>3],vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc[AHS_kc>3]+1])
  
  #Cliques:  A clique is a maximally complete subgraph of three or more nodes.
  #In other words, a clique consists of a subset of nodes, all of which are adjacent to each other, and where there are no other 
  #...nodes that are also adjacent to all of the members of the clique (Luce and Perry 1949)
  
  #We need to symmetrize recover all ties between i and j.
  set.network.attribute(AHS_Network, "directed", FALSE) 
  
  #The clique census returns a list with several important elements 
  #Let's assign that list to an object we'll call AHS_Cliques.
      #The clique.comembership parameter takes values "none" (no co-membership is computed),
      #"sum" (the total number of shared cliques for each pair of nodes is computed),
      #bysize" (separate clique co-membership is computed for each clique size)
  
  AHS_Cliques <- clique.census(AHS_Network, mode = "graph", clique.comembership="sum")
  AHS_Cliques # an object that now contains the results of the clique census
  
      #The first element of the result list is clique.count: a matrix containing the number of cliques of different 
      #...sizes (size = number of nodes in the clique).
      #The first column (named Agg) gives you the total  number of cliqies of each size,
      #The rest of the columns show the number of cliques each node participates in.
  
  #Note that this includes cliques of sizes 1 & 2. We have those when the largest fully connected structure includes just 1 or 2 nodes.
  AHS_Cliques$clique.count

  #The second element is the clique co-membership matrix:
  AHS_Cliques$clique.comemb
  
  # The third element of the clique census result is a list of all found cliques:
  # (Remember that a list can have another list as its element)
  AHS_Cliques$cliques # a full list of cliques, all sizes
  
  AHS_Cliques$cliques[[1]] # cliques size 1
  AHS_Cliques$cliques[[2]] # cliques of size 2
  AHS_Cliques$cliques[[3]] # cliques of size 3
  AHS_Cliques$cliques[[4]] # cliques of size 4

###########################
#   NODE LEVEL MEASURES   #
###########################
  
#Restoring Our Directed Network
set.network.attribute(AHS_Network, "directed", TRUE) 

#Reachability
#An actor is "reachable" by another if there exists any set of connections by which we can trace from the source to the target actor, 
#regardless of how many other nodes fall between them (Wasserman and Faust 1994, p. 132).
#If the network is a directed network, then it possible for actor i to be able to reach actor j, but for j not to be able to reach i.
#We can classify how connected one node is to another by considering the types of paths connecting them.
  #Weakly Connected: The nodes are connected by a semi-path (--> i <--- j ---> k)
  #Unilaterally Connected: The nodes are connected by a path (i --> j --> k)
  #Strongly Connected: The nodes are connected by a path from i to k and a path from k to i.
  #Recursively Connected: The nodes are strongly connected, and the nodes along the path from i to k and from k to i are the same in reverse order.
    #e.g., i <--> j <--> k 
  
#Interpreting the reachability matrix, the first column indicates a specific node, the second an alter (alters can occur multiple times),
#and the third column indicates the number of paths connecting the two (total is a cumulative count of the number of paths in the network).
#For example, interpreting row 2, node 2 can reach node 235 through 235 paths (470-235), whereas in the middle of the list node 343 can reach node 1 through only 1 path.
reachability(AHS_Network) 
??reachablity #For more information on this measure

#Degree Centraltiy: Total, In-Degree, Out-Degree
  
  #In-Degree Centrality: The number of nodes adjacent to node i (Wasserman and Faust 1994, p. 126). i <--
  InDegree <- degree(AHS_Network, cmode="indegree")
  InDegree <- InDegree * .15                #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes
  
  set.vertex.attribute(AHS_Network, "InDegree", InDegree)
  
  #Out-Degree Centrality: The number of nodes adjacent from node i (Wasserman and Faust, p. 126). i -->
  OutDegree <- degree(AHS_Network, cmode="outdegree")
  OutDegree <- OutDegree * .5                 #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes
  
  set.vertex.attribute(AHS_Network, "OutDegree", OutDegree)
  
  #Total Degree Centrality: The Total Number of Adjacent Nodes (In-Degree + Out-Degree)
  TotalDegree <- OutDegree + InDegree
  TotalDegree <- TotalDegree * .4
  
  set.vertex.attribute(AHS_Network, "TotalDegree", TotalDegree)
  
  #Try Sizing by the Different Degrees
  set.seed(12345)
  ggnetwork(AHS_Network) %>%
    ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + 
    geom_edges(color = "lightgray") +
    geom_nodes(color = Color_Race, size = InDegree) +       
    #geom_nodelabel_repel (color = Race, label = Race) +#   For networks with fewer nodes, we might want to label
    theme_blank() + 
    geom_density_2d()

#Path Centralities: Closeness Centrality, Information Centrality, Betweenness Centrality
  
  #Closeness Centrality: Closeness centrality measures the geodesic distances of node i to all other nodes.
  #Functionally, this measures range from 0 to 1, and is the inverse average distance between actor i and all other actors (Wasserman and Faust 1994, p. 185)
  #This measure does not work well when there are disconnected components because the distances between components cannot be summed as
  #...they are technically infinite. There are several work arounds, see Acton and Jasny's alternative below.
  
  AHS_Closeness <- closeness(AHS_Network, gmode="digraph", cmode="directed")
  AHS_Closeness
  hist(AHS_Closeness , xlab="Closness", prob=TRUE) 

  #Alternative Approach to Measuring Closesness from the Geodesic Distances Matrix from Acton's and Jasny's Statnet Tutorial
  Closeness <- function(x){ # Create an alternate closeness function!
    geo <- 1/geodist(x)$gdist # Get the matrix of 1/geodesic distance
    diag(geo) <- 0 # Define self-ties as 0
    apply(geo, 1, sum) # Return sum(1/geodist) for each vertex
  }
  
  Closeness <-  Closeness(AHS_Network)                        #Applying the function
  Closeness
  hist( Closeness , xlab="Alt. Closeness", prob=TRUE)         #Better behaved!
  
  #Information Centrality: Information Centrality measures the information flowing from node i.
  #In general, actors with higher information centrality are predicted to have greater control over the flow of information within a network.
  #Highly information-central individuals tend to have a large number of short paths to many others within the social structure.
  ?infocent  #For more information
  
  AHS_Info <- infocent(AHS_Network, rescale=TRUE)
  AHS_Info
  hist(AHS_Info , xlab="Information Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=(AHS_Info)*250, gmode="graph") # Use w/gplot
  #As suggested by the histogram there is relatively little variation in information centrality in this graph.
  
  #Betweenness Centrality: The basic intuition behind Betweenness Centrality is that the actor between all the other actors in the 
  #...has some control over the paths in the network. 
  #Functionally, Betweenness Centrality is the ratio of the sum of all shortest paths linking j and k that includes node i over 
  #...all the shortest paths linking j and k (Wasserman and Faust 1994, p. 191)
  
  AHS_Betweenness <- betweenness(AHS_Network, gmode="digraph")  
  AHS_Betweenness
  hist(AHS_Betweenness , xlab="Betweenness Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=sqrt(AHS_Betweenness)/25, gmode="digraph") 
  
  #Comparing Closeness and Betweenness Centralities
  cor(Closeness, AHS_Betweenness)                             #Correlate our adjusted measure of closeness with betweenness
  plot(Closeness, AHS_Betweenness)                            #Plot the bivariate relationship
  
#Measures of Power in Influence Networks: Bonachich and Eigenvector Centrality
  
  #Bonachich Centrality: The intuition behind Bonachich Power Centrality is that the power of node i is recursively defined 
  #...by the sum of the power of its alters. 
  #The nature of the recursion involved is then controlled by the power exponent: positive values imply that vertices become 
  #...more powerful as their alters become more powerful (as occurs in cooperative relations), while negative values imply 
  #...that vertices become more powerful only as their alters become weaker (as occurs in competitive or antagonistic relations).
  ?bonpow   #For more information about the measure
  
  #Eigenvector Centrality: Conceptually, the logic behind eigenvectory centrality is that node i's influence is proportional to the 
  #...to the centraltities' of the nodes adjacent to node i. In other words, we are important because we know highly connected people.
  #Mathematically, we capture this concept by calculating the values of the first eigenvector of the graph's adjacency matrix.
  ?evcent   #For more information.
  
  AHS_Eigen <- evcent(AHS_Network)
  AHS_Eigen
  hist(AHS_Eigen , xlab="Eigenvector Centrality", prob=TRUE) 
  
  gplot(AHS_Network, vertex.cex=AHS_Eigen*10, gmode="digraph") 
  
#Adding Network Attributes to the Node List
AHS_NodeList<- cbind(AHS_NodeList, AHS_Betweenness, AHS_Closeness, AHS_Info, Eigen, InDegree, OutDegree)

###########################
#   POSITIONAL ANALYSIS   #
###########################
#Burt's (1992) measures of structural holes are supported by iGraph and ego network variants of these measures are supported by egonet
#...the egonet package is compatable with the sna package.
  
#You can find descriptions and code to run Burt's measures in igraph at: http://igraph.org/r/doc/constraint.html
  
  #Brokerage: The brokerage measure included in the SNA package builds on past work on borkerage (Marsden 1982), but is a more 
  #...explicitly group oriented measure. Unlike Burt's (1992) measure, the Gould-Fernandez measure requires specifying a group variable
  #...based on an attribute. I use race in the example below.
  
    #Brokerage Roles: Group-Based Concept
    #w_I: Coordinator Role (Mediates Within Group Contact)
    #w_O: Itinerant Broker Role (Mediates Contact between Individuals in a group to which the actor does not belong)
    #b_{IO}: Representative: (Mediates incoming contact from out-group members)
    #b_{OI}: Gatekeeper: (Mediates outgoing contact from in-group members)
    #b_O: Liason Role: (Mediates contact between individuals of two differnt groups, neither of which the actor belongs)
    #t: Total or Cumulative Brokerage (Any of the above paths)
  ?brokerage   #for more information
  
  AHS_Brokerage <- brokerage(AHS_Network, Race)
  AHS_Brokerage
  hist(AHS_Brokerage$cl, xlab="Cumulative Brokerage", prob=TRUE) 
  
  AHS_CBrokerage <- (AHS_Brokerage$cl)
  gplot(AHS_Network, vertex.cex=AHS_CBrokerage*.5, gmode="digraph") 
  
#Jimi Adams's Function for Calculating Effective Size
  #Effective size is the average degree of ego network without counting alters' ties to ego 
  
  #Detaching to ensure that Statnet and iGraph do not conflict
  detach("package:sna", unload=TRUE)
  library(igraph)
  
  #Loading Example Data
  load("Flo_Edges.Rda")
  load("Flo_Nodes.Rda")
  
  g=graph.data.frame(Flo_Edges)
  V(g)$ego=as.character(Flo_Nodes$ego[match(V(g)$name,Flo_Nodes$ID)])
  V(g)
  plot(g, vertex.label=Flo_Nodes$ego, 
       edge.arrow.size=.05, edge.arrow.width=.05,
       vertex.size=degree(g,mode = "in"))
  
  effective.size <- function(g, ego, mode="all") {		# igraph doesn't have an "effective size" command
    n <- degree(g, mode=mode)[ego]						        # ego's degree
    es <- n												                    # initializing effective size
    ns <- neighbors(g,ego, mode=mode)					        # identifying ego's neighborhood
    if(n>0){
      for (j in 1:n){									                # looping over everyone in ns
        nsns <- neighbors(g,ns[j], mode=mode)		      # finding neighbors' neighbors
        r <- length(intersect(ns, nsns))			        # only those also in ego's neighborhood
        es <- es - (r/n)							                # subtracting redundancies
      }
    }
    return(es)
  }
  
  effective.size(g, "9", mode="all")
  
  #Trying on Our School Networks
  AHS_Graph=graph.data.frame(AHS_Edges)
  effective.size(AHS_Graph, "1", mode="all")
  
#Jimi Adams's Function for Calculating the Index of Qualitative Variatio
  #The index of qualitative variation (IQV) is a measure of variation among the categories
  #of a qualitative variable.  
  #It is calculated as [1 - sum(p2)]  * [K / (K - 1)], 
  #where p is the proportion in each category, and K is the number of categories. 
  #The variable ranges from 0 to 1, where 0 represents a completely homogeneous group,
  #and 1 represents a group with equal parts in each category.
  
  iqv <- function(graph, attribute) {
    N <- length(V(graph))
    cats <- unique(get.vertex.attribute(graph,attribute,V(graph)))
    nlev <- length(cats)
    cat_list <- rep(0,N)
    p <- rep(0, N) 
    p2_list <- as.list(0)
    for (j in 1:nlev) {
      for(i in 1:length(V(graph))){
        i_att <- get.vertex.attribute(graph, attribute, V(graph)[neighborhood(graph,1)[[i]]]) 
        att <- length(which(i_att==cats[j]))
        num <- length(V(graph)[neighborhood(graph, 1)[[i]]])
        p[i]<-att/num
        p2<-p*p
      }
      p2_list[[j]] <- p2
      cat_list <- cat_list + p2
    }
    IQV <- (nlev/(nlev-1))*(1-cat_list)
    IQV1 <- as.list(0)
    IQV1[[2]] <- IQV
    IQV1[[1]] <- mean(IQV)
    names(IQV1) <- c("full_graph", "egonet")
    return (IQV1)
  } 
  
  #Assigning Attributes
  AHS_Graph <- AHS_Graph %>%
    set_vertex_attr("sex", value = AHS_Nodes$sex) %>%
    set_vertex_attr("grade", value = AHS_Nodes$grade)
  
  V(AHS_Graph)
  
  #This function takes some time to calculate for a network of this size
  #because you are calculating the variation ratio for each person 
  #in a passed complete network, for a single attribute at a time.
  iqv(AHS_Graph, "sex")
  
#Detaching to ensure that Statnet and iGraph do not conflict
  detach("package:igraph", unload=TRUE)
  library(sna)

#Structural Equivalence
  #Structural equivalence: Similarity/Distance Measures Include:
    #Correlation
    #Euclidean Distance
    #Hamming Distance
    #Gamma Correlation
  sedist(AHS_Network, mode="digraph", method="hamming")
  
  #Cluster based on structural equivalence:
  AHS_Clustering <- equiv.clust(AHS_Network, mode="digraph",plabels=network.vertex.names(AHS_Network))
  AHS_Clustering                        #Specification of the equivalence method used
  plot(AHS_Clustering)                  #Plot the dendrogram
  rect.hclust(AHS_Clustering$cluster, h=30)
  
  #Generating a Block Model based on the Structural Equivalence Clustering
  AHS_BM <- blockmodel(AHS_Network, AHS_Clustering, h=30)
  AHS_BM
    
  #Extract the block image for Visualization
  bimage <- AHS_BM$block.model
  bimage
  bimage[is.nan(bimage)] <- 1
  
  #Visualizing the block image (with self-reflexive ties)
  gplot(bimage, diag=TRUE, edge.lwd=bimage*5, vertex.cex=sqrt(table(AHS_BM$block.membership))/2,
        gmode="graph", vertex.sides=50, vertex.col=gray(1-diag(bimage)))