#Social Networks and Health Training Program #Descriptive Network Analyses #Jonathan H. Morgan and Molly Copeland #RESOURCES #Acton's and Jasny's Statnet Tutorial: https://statnet.org/trac/raw-attachment/wiki/Resources/introToSNAinR_sunbelt_2012_tutorial.pdf #Wasserman and Faust's (1994) book, Social Network Analysis: Methods and Applications #Clearing Old Data rm(list = ls()) gc() ######################## # LOADING PACKAGES # ######################## library (plyr) library (dplyr) library (tidyr) library(statnet) library(ggplot2) library(ggnetwork) #library (igraph) igraph and sna packages are not compatible. Run one or the other. #To get get more information about the sna and statnet packages #The statnet package draws on the sna package to compute the majority of its descriptive network statistics. help(package = sna) help(package = statnet) ###################### # IMPORTING DATA # ###################### #Look for an icon in your task bar to select today's data set: ahs_wpvar.csv #I chose this import strategy because particpants may have varying levels of familiarity with the computer they are using. AHS_WPVAR=read.csv(file.choose(),header=TRUE) ################################################# # Creating School 7's Edgelist and Nodelist # ################################################# ############################# # CREATING THE EDGELIST # ############################# #Step 1: Subsetting AHS_WPVAR to Isolate Schools AHS_Edges <- AHS_WPVAR %>% #Step 1: Selecting Variables of Interest select(ego_nid, mfnid_1:mfnid_5, ffnid_1:ffnid_5, commcnt) %>% #Step 2: Filtering to keep only community 7 filter(commcnt == 7) #Step 3: Gathering Columns to Create a Long Data Set AHS_Edges <- AHS_Edges %>% gather(Alter_Label, value, mfnid_1:mfnid_5, ffnid_1:ffnid_5,na.rm = TRUE) #Step 4: Deleting 9999 values from the data subsets; #the gather statements have eliminated the other missing values. #Renaming ego_nid to Sender #Renaming Value to Target AHS_Edges <- AHS_Edges %>% filter (value != 99999) %>% #Eliminating 99999 values #We go from 2,659 edges to 2,099 edges select(ego_nid, value) %>% #Dropping the now redundant ID column. rename ( Sender = `ego_nid`, #Renaming columns to indicate directionality. Target = `value`) #################################################### # CREATING NODELIST AND SEQUENTIAL NUMERIC IDs # #################################################### #Step 1: Creating a Comprehensive Nodelist AHS_Nodes <- AHS_Edges %>% gather(Alter_Label, value, Sender, Target, na.rm = TRUE) %>% #Step 2: Dropping the old column headers select(value) %>% #Step 3: Renaming value ego_nid to merge in attributes rename(ego_nid = `value`) #Step 4: Getting Rid of Duplicates AHS_Nodes <- AHS_Nodes %>% distinct(ego_nid) #Step 5: Creating Numeric IDs because there are gaps #in the sequence of ID numbers which can cause errors AHS_Nodes <- AHS_Nodes %>% (add_rownames) %>% #Getting the rownames to create sequential IDs rename (Sender_ID = rowname)%>% #Renaming rowname to Sender mutate(Sender_ID = as.numeric(Sender_ID)) #Step 6: Merging/Joining Numeric IDs into the Edgelist #Renaming Variables to merge numeric IDs for Senders #Merging the numeric IDs for senders #Renamining Variable for Targets #Merging the numeric IDs for targets #Renaming ego_nid to Sender in order to merge with the #Edgelist AHS_Nodes <- AHS_Nodes %>% rename(Sender = `ego_nid`) #Joining Sequential Numeric ID for Senders AHS_Edges <- AHS_Edges %>% left_join(AHS_Nodes, by = c("Sender")) #Renaming Sender to Target and Sender_ID to Target_ID, #so we can merge our sequential numeric IDs AHS_Nodes <- AHS_Nodes %>% rename(Target = `Sender`, Target_ID = `Sender_ID`) #Merging Sequential Numeric IDs for Targets AHS_Edges <- AHS_Edges %>% left_join(AHS_Nodes, by = c("Target")) #Step 7: Tidying Up Our Edgelists and Nodelists #Because we have the labels in our nodelist, #We are going to drop our old node labels to avoid #inducing errors based in gaps in the IDs AHS_Edges <- AHS_Edges %>% select(Sender_ID, Target_ID) %>% rename(Sender = `Sender_ID`, Target = `Target_ID`) #Relabeling Target and Target_ID back to ego_nid and ID AHS_Nodes <- AHS_Nodes %>% rename (ego_nid = `Target`, ID = `Target_ID`) ########################## # MERGING ATTRIBUTES # ########################## AHS_Attributes <- AHS_WPVAR %>% #Step 1: Selecting the Variables of Interest select(commcnt, ego_nid, sex, grade, race5) %>% #Step 2: Flitering to retain only community 7 filter(commcnt == 7) #Step 3: Merging/Joining Attributes with the Nodes File AHS_Nodes <- AHS_Nodes %>% left_join(AHS_Attributes, by = c("ego_nid")) #We are doing a left_join because we only want #attributes for vertices(nodes) that appear in our #network #Step 4: Tidying: Removing Non-essential data sets rm(AHS_Attributes) save(AHS_Edges,file="AHS_Edges.Rda") save(AHS_Nodes, file="AHS_Nodes.Rda") ################################################ # Constructing and Visualizing the Network # ################################################ #Step 1: Formatting Sender and Target Variables to Construct a Statnet Network Object AHS_Edges[,1]=as.character(AHS_Edges[,1]) AHS_Edges[,2]=as.character(AHS_Edges[,2]) #Step 2: Creating a Network Object #Note, this is a directed graph. So, we specify that in the network object now. #The specification of the graph as either directed or undirected is important because it impacts fundamentally how we interpret the relationships described by the graph. AHS_Network=network(AHS_Edges,matrix.type="edgelist",directed=TRUE) AHS_Network #Step 3: Calculating Network Measures to Create Network Attributes for Visualization Purposes, More on the Measures Soon Eigen <- evcent(AHS_Network) #Computing the eigenvector centrality of each node InDegree <- degree(AHS_Network, cmode="indegree") #Computing the in-degree of each node InDegree <- InDegree * .15 #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes #Step 4: Creating Network Attributes #Specifying Colors for Gender and Race AHS_Nodes <- AHS_Nodes %>% mutate (Color_Female = ifelse(sex == 2, 'red', ifelse(sex != 2, 'black', 'black'))) AHS_Nodes <- AHS_Nodes %>% mutate (Color_Race = ifelse(race5 == 0, 'gold', ifelse(race5 == 1, 'chartreuse4', ifelse(race5 == 2, 'blue1', ifelse(race5 == 3, 'brown', ifelse(race5 == 4, 'purple', 'gray0')))))) #Creating Vectors to Assign as Attributes to the Network Gender <- as.vector(AHS_Nodes$sex) Race <- as.vector(AHS_Nodes$race5) Color_Race <- as.vector(AHS_Nodes$Color_Race) #Important: 2d network Plots require a vector for an attribute Color_Female <- as.vector(AHS_Nodes$Color_Female) #Assigning Attributes to Vertices set.vertex.attribute(AHS_Network,"Gender",Gender) set.vertex.attribute(AHS_Network,"Race",Race) set.vertex.attribute(AHS_Network,"Color_Race",Color_Race) set.vertex.attribute(AHS_Network,"Color_Female",Color_Female) set.vertex.attribute(AHS_Network, "InDegree", InDegree) #Step 5: Visualizing the Network AHS_Network summary(AHS_Network) #Get numerical summaries of the network set.seed(12345) ggnetwork(AHS_Network) %>% ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + geom_edges(color = "lightgray") + geom_nodes(color = Color_Race, size = InDegree) + #geom_nodelabel_repel (color = Race, label = Race) +# For networks with fewer nodes, we might want to label theme_blank() + geom_density_2d() ############################ # FUNDAMENTAL CONCEPTS # ############################ #Node: An entity such as an social actor, firm, or organism. #Nodes can represent almost anything, as long as there is some meaningful set of relationships between the entities. #Edge: A relationship between a pair of nodes where the relationship is nondirectional (e.g., kinship relationships or co-memberships in organizations). #Arc: A directed relationship such as friendships. I can be friends with Joe, but Joe may not necessarily be my friend. Sad for me. #Graph: A set of nodes and edges. The relationships are nondirectional and dichotomous (We are either kin or not.) #Di-Graph: A set of nodes and arcs. The relationships are directional and can either be dichotomous or weighted. #Network: A graph or di-graph where the nodes have attributes assigned to them such as names, genders, or sizes. #Basic Measures #Network Size: We also know this from the number of obsevations in the Nodelist network.size(AHS_Network) #Number of Edges: Corresponds to the number of observations in the edgelist network.edgecount(AHS_Network) #Number of Dyads (Node Pairs) network.dyadcount(AHS_Network) ############################# # SYSTEM LEVEL MEASURES # ############################# #Density: The ratio of Observed Ties/All Possible Ties gden(AHS_Network, mode = 'digraph') #Degree Distribution #Calculating In-Degree and Out-Degree to Visualize the Total Degree Distribution: What is the distribution of Connectiveness? InDegree <- degree(AHS_Network, cmode="indegree") #Computing the in-degree of each node OutDegree <- degree(AHS_Network, cmode="outdegree") #Computing the out-degree of each node par(mar = rep(2, 4)) par(mfrow=c(2,2)) # Set up a 2x2 display hist(InDegree, xlab="Indegree", main="In-Degree Distribution", prob=FALSE) hist(OutDegree, xlab="Outdegree", main="Out-Degree Distribution", prob=FALSE) hist(InDegree+OutDegree, xlab="Total Degree", main="Total Degree Distribution", prob=FALSE) par(mfrow=c(1,1)) # Restore display #Average Path Length #Walks: A walk is a sequence of nodes and ties, starting and ending with nodes, in which each node is incident with the edges #...following and preceding it in the sequence (Wasserman and Faust 1994, p. 105). # The beginning and ending node of a walk may be differeent, some nodes may be included more than once, and some ties may be included more than once. #Paths: A path is a walk where all the nodes and all the ties are distinct. #A shortest path between two nodes is refrred to as a geodesic (Wasserman and Faust 1994, p. 110) #Average path length or the geodesic distance is the average number of steps along the shortest paths for all possible pairs of nodes. # By default, nodes that cannot reach each other have a geodesic distance of infinity. # Because, Inf is the constant for infinity, we need to replace INF values to calculate the shortest path length. # Here we replace infinity values with 0 for visualization purposes. AHS_Geo <- geodist(AHS_Network, inf.replace=0) #AHS_Geo <- geodist(AHS_Network) #Matrix with Infinity (AHS_Geo) #The length of the shortest path for all pairs of nodes. AHS_Geo$gdist #The number of shortest path for all pairs of nodes. AHS_Geo$counts #Shortest Path Matrix Geo_Dist = AHS_Geo$gdist hist(Geo_Dist) #For non-zero paths, we see the distirubtion is approximately centered around 4.5. #If we compare to iGraph's reported value of 4.496353, this seems reasonable. #average.path.length(AHS_Graph, directed=TRUE, unconnected=TRUE) #Global Clustering Coefficient: Transitivity #Transitivity: A triad involving actors i, j, and k is transitive if whenever i --> j and j --> k then i --> k (Wasserman and Faust 1994, p. 243) gtrans(AHS_Network) #Weak and Weak Census #Weak transitivity is the most common understanding, the one reflected in Wasserman's and Faust's definition. #When 'weak' is specified as the measure, R returns the fraction of potentially intransitive triads obeying the weak condition #Transitive Triads/Transtive and Intransitive Triads. #In contrast, when 'weak census' is specfified, R returns the count of transitive triads. gtrans(AHS_Network, mode='digraph', measure='weak') gtrans(AHS_Network, mode='digraph', measure='weakcensus') #CUG (Conditional Uniform Graph) Tests: IS this Graph More Clustered than We Would Expect by Chance #See Wasserman and Faust 1994, p. 543-545 for more information. #Note: These tests are somewhat computationally intensive. #Conducting these tests, we find that athough the transitivity is higher than would be expect by chance given the network's size; #...it is not greater than would be expected given either the number of edges or dyads. #Test transitivity against size Cug_Size <- cug.test(AHS_Network,gtrans,cmode="size") plot(Cug_Size) #Test transitivity against density Cug_Edges <- cug.test(AHS_Network,gtrans,cmode="edges") plot(Cug_Edges) #Test Transitivity against the Dyad Census Cug_Dyad <- cug.test(AHS_Network,gtrans,cmode="dyad.census") plot(Cug_Dyad) ########################### # MESO-LEVEL MEASURES # ########################### #Dyads #Null-Dyads: Pairs of nodes with no arcs between them #Asymmetric dyads: Pairs of nodes that have an arc between the two nodes going in one direction or the other, but not both #Mutual/Symmetric Dyad: Pairs of nodes that have arcs going to and from both nodes <--> #Number of Symmetric Dyads mutuality(AHS_Network) #Dyadic Ratio: Ratio of Dyads where (i,j)==(j,i) to all Dyads grecip(AHS_Network, measure="dyadic") #Edgwise Ratio: Ratio of Reciprocated Edges to All Edges grecip(AHS_Network, measure="edgewise") #Directed Triad Census #Triads can be in Four States #Empty: A, B, C #An Edge: A -> B, C #A Star (2 Edges): A->B->C #Closed: A->B->C->A #Triad types (per Davis & Leinhardt): #003 A, B, C, empty triad. #012 A->B, C #102 A<->B, C #021D A<-B->C #021U A->B<-C #021C A->B->C #111D A<->B<-C #111U A<->B->C #030T A->B<-C, A->C #030C A<-B<-C, A->C. #201 A<->B<->C. #120D A<-B->C, A<->C. #120U A->B<-C, A<->C. #120C A->B->C, A<->C. #210 A->B<->C, A<->C. #300 A<->B<->C, A<->C, completely connected. triad.census(AHS_Network) #Hierarchy Measures: Components,Cut Points, K-Cores, and Cliques #Components: Components are maximally connected subgraphs (Wasserman and Faust 1994, p. 109). #Recall that community 7 has two large components and several small dyads and triads. #There are two types of components: strong and weak. #Strong components are components connected through directed paths (i --> j, j --> i) #Weak components are components connected through semi-paths (--> i <-- j --> k) components(AHS_Network, connected="strong") components(AHS_Network, connected="weak") #Which node belongs to which component? AHS_Comp <- component.dist(AHS_Network, connected="strong") AHS_Comp$membership # The component each node belongs to AHS_Comp$csize # The size of each component AHS_Comp$cdist # The distribution of component sizes #Cut-Sets and Cut-Points: Cut-sets describe the connectivity of the graph based on the removal of nodes, while cut-points describe #...the connectivity of the graph based on the removal of lines (Harary 1969) #k refers to the number of nodes or lines that would need to be removed to reduce the graph to a disconnected state. cutpoints(AHS_Network, connected="strong") gplot(AHS_Network,vertex.col=2+cutpoints(AHS_Network,mode="graph",return.indicator=T)) #The plot only shows subgraphs consisting of nodes with a degree of 2 or more. #The green nodes indicate cut-ponts where the removal of the node would separate one subgraph from another. #Let's remove one of the cutpoints and count components again. AHS_Cut <- AHS_Network[-11,-11] #"-11" selects all the elments in the first row/column. #So, AHS_Cut will be AHS_Network with node 1 removed. components(AHS_Cut, connected="strong") #There are 74 strong components in AHS_Cut compared to 73 in AHS_Network #Bi-Components: Bi-Components refer to subgraphs that require at least the removal of two nodes or two lines to transform it into a #...disconnected set of nodes. #In large highly connected networks, we frequently analyze the properties of the largest bi-component to get a better understanding #...of the social system represented by the network. bicomponent.dist(AHS_Network) #Identify Cohesive Subgroups #K-Cores: A k-core is a subgraph in which each node is adjacent to at least a minimum number of, k, to the other nodes in the subgraph. #..., while a k-plex specifies the acceptable number of lines that can be absent from each node (Wasserman and Faust 1994, p. 266). kcores(AHS_Network) #Show the nesting of cores AHS_kc<-kcores(AHS_Network,cmode="indegree") gplot(AHS_Network,vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc+1]) #Now, showing members of the 4-core only (All Nodes Have to Have a Degree of 4) gplot(AHS_Network[AHS_kc>3,AHS_kc>3],vertex.col=rainbow(max(AHS_kc)+1)[AHS_kc[AHS_kc>3]+1]) #Cliques: A clique is a maximally complete subgraph of three or more nodes. #In other words, a clique consists of a subset of nodes, all of which are adjacent to each other, and where there are no other #...nodes that are also adjacent to all of the members of the clique (Luce and Perry 1949) #We need to symmetrize recover all ties between i and j. set.network.attribute(AHS_Network, "directed", FALSE) #The clique census returns a list with several important elements #Let's assign that list to an object we'll call AHS_Cliques. #The clique.comembership parameter takes values "none" (no co-membership is computed), #"sum" (the total number of shared cliques for each pair of nodes is computed), #bysize" (separate clique co-membership is computed for each clique size) AHS_Cliques <- clique.census(AHS_Network, mode = "graph", clique.comembership="sum") AHS_Cliques # an object that now contains the results of the clique census #The first element of the result list is clique.count: a matrix containing the number of cliques of different #...sizes (size = number of nodes in the clique). #The first column (named Agg) gives you the total number of cliqies of each size, #The rest of the columns show the number of cliques each node participates in. #Note that this includes cliques of sizes 1 & 2. We have those when the largest fully connected structure includes just 1 or 2 nodes. AHS_Cliques$clique.count #The second element is the clique co-membership matrix: AHS_Cliques$clique.comemb # The third element of the clique census result is a list of all found cliques: # (Remember that a list can have another list as its element) AHS_Cliques$cliques # a full list of cliques, all sizes AHS_Cliques$cliques[[1]] # cliques size 1 AHS_Cliques$cliques[[2]] # cliques of size 2 AHS_Cliques$cliques[[3]] # cliques of size 3 AHS_Cliques$cliques[[4]] # cliques of size 4 ########################### # NODE LEVEL MEASURES # ########################### #Restoring Our Directed Network set.network.attribute(AHS_Network, "directed", TRUE) #Reachability #An actor is "reachable" by another if there exists any set of connections by which we can trace from the source to the target actor, #regardless of how many other nodes fall between them (Wasserman and Faust 1994, p. 132). #If the network is a directed network, then it possible for actor i to be able to reach actor j, but for j not to be able to reach i. #We can classify how connected one node is to another by considering the types of paths connecting them. #Weakly Connected: The nodes are connected by a semi-path (--> i <--- j ---> k) #Unilaterally Connected: The nodes are connected by a path (i --> j --> k) #Strongly Connected: The nodes are connected by a path from i to k and a path from k to i. #Recursively Connected: The nodes are strongly connected, and the nodes along the path from i to k and from k to i are the same in reverse order. #e.g., i <--> j <--> k #Interpreting the reachability matrix, the first column indicates a specific node, the second an alter (alters can occur multiple times), #and the third column indicates the number of paths connecting the two (total is a cumulative count of the number of paths in the network). #For example, interpreting row 2, node 2 can reach node 235 through 235 paths (470-235), whereas in the middle of the list node 343 can reach node 1 through only 1 path. reachability(AHS_Network) ??reachablity #For more information on this measure #Degree Centraltiy: Total, In-Degree, Out-Degree #In-Degree Centrality: The number of nodes adjacent to node i (Wasserman and Faust 1994, p. 126). i <-- InDegree <- degree(AHS_Network, cmode="indegree") InDegree <- InDegree * .15 #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes set.vertex.attribute(AHS_Network, "InDegree", InDegree) #Out-Degree Centrality: The number of nodes adjacent from node i (Wasserman and Faust, p. 126). i --> OutDegree <- degree(AHS_Network, cmode="outdegree") OutDegree <- OutDegree * .5 #Scaling in-degree to avoid high in-degree nodes from crowding out the rest of the nodes set.vertex.attribute(AHS_Network, "OutDegree", OutDegree) #Total Degree Centrality: The Total Number of Adjacent Nodes (In-Degree + Out-Degree) TotalDegree <- OutDegree + InDegree TotalDegree <- TotalDegree * .4 set.vertex.attribute(AHS_Network, "TotalDegree", TotalDegree) #Try Sizing by the Different Degrees set.seed(12345) ggnetwork(AHS_Network) %>% ggplot(aes(x = x, y = y, xend = xend, yend = yend)) + geom_edges(color = "lightgray") + geom_nodes(color = Color_Race, size = InDegree) + #geom_nodelabel_repel (color = Race, label = Race) +# For networks with fewer nodes, we might want to label theme_blank() + geom_density_2d() #Path Centralities: Closeness Centrality, Information Centrality, Betweenness Centrality #Closeness Centrality: Closeness centrality measures the geodesic distances of node i to all other nodes. #Functionally, this measures range from 0 to 1, and is the inverse average distance between actor i and all other actors (Wasserman and Faust 1994, p. 185) #This measure does not work well when there are disconnected components because the distances between components cannot be summed as #...they are technically infinite. There are several work arounds, see Acton and Jasny's alternative below. AHS_Closeness <- closeness(AHS_Network, gmode="digraph", cmode="directed") AHS_Closeness hist(AHS_Closeness , xlab="Closness", prob=TRUE) #Alternative Approach to Measuring Closesness from the Geodesic Distances Matrix from Acton's and Jasny's Statnet Tutorial Closeness <- function(x){ # Create an alternate closeness function! geo <- 1/geodist(x)$gdist # Get the matrix of 1/geodesic distance diag(geo) <- 0 # Define self-ties as 0 apply(geo, 1, sum) # Return sum(1/geodist) for each vertex } Closeness <- Closeness(AHS_Network) #Applying the function Closeness hist( Closeness , xlab="Alt. Closeness", prob=TRUE) #Better behaved! #Information Centrality: Information Centrality measures the information flowing from node i. #In general, actors with higher information centrality are predicted to have greater control over the flow of information within a network. #Highly information-central individuals tend to have a large number of short paths to many others within the social structure. ?infocent #For more information AHS_Info <- infocent(AHS_Network, rescale=TRUE) AHS_Info hist(AHS_Info , xlab="Information Centrality", prob=TRUE) gplot(AHS_Network, vertex.cex=(AHS_Info)*250, gmode="graph") # Use w/gplot #As suggested by the histogram there is relatively little variation in information centrality in this graph. #Betweenness Centrality: The basic intuition behind Betweenness Centrality is that the actor between all the other actors in the #...has some control over the paths in the network. #Functionally, Betweenness Centrality is the ratio of the sum of all shortest paths linking j and k that includes node i over #...all the shortest paths linking j and k (Wasserman and Faust 1994, p. 191) AHS_Betweenness <- betweenness(AHS_Network, gmode="digraph") AHS_Betweenness hist(AHS_Betweenness , xlab="Betweenness Centrality", prob=TRUE) gplot(AHS_Network, vertex.cex=sqrt(AHS_Betweenness)/25, gmode="digraph") #Comparing Closeness and Betweenness Centralities cor(Closeness, AHS_Betweenness) #Correlate our adjusted measure of closeness with betweenness plot(Closeness, AHS_Betweenness) #Plot the bivariate relationship #Measures of Power in Influence Networks: Bonachich and Eigenvector Centrality #Bonachich Centrality: The intuition behind Bonachich Power Centrality is that the power of node i is recursively defined #...by the sum of the power of its alters. #The nature of the recursion involved is then controlled by the power exponent: positive values imply that vertices become #...more powerful as their alters become more powerful (as occurs in cooperative relations), while negative values imply #...that vertices become more powerful only as their alters become weaker (as occurs in competitive or antagonistic relations). ?bonpow #For more information about the measure #Eigenvector Centrality: Conceptually, the logic behind eigenvectory centrality is that node i's influence is proportional to the #...to the centraltities' of the nodes adjacent to node i. In other words, we are important because we know highly connected people. #Mathematically, we capture this concept by calculating the values of the first eigenvector of the graph's adjacency matrix. ?evcent #For more information. AHS_Eigen <- evcent(AHS_Network) AHS_Eigen hist(AHS_Eigen , xlab="Eigenvector Centrality", prob=TRUE) gplot(AHS_Network, vertex.cex=AHS_Eigen*10, gmode="digraph") #Adding Network Attributes to the Node List AHS_NodeList<- cbind(AHS_NodeList, AHS_Betweenness, AHS_Closeness, AHS_Info, Eigen, InDegree, OutDegree) ########################### # POSITIONAL ANALYSIS # ########################### #Burt's (1992) measures of structural holes are supported by iGraph and ego network variants of these measures are supported by egonet #...the egonet package is compatable with the sna package. #You can find descriptions and code to run Burt's measures in igraph at: http://igraph.org/r/doc/constraint.html #Brokerage: The brokerage measure included in the SNA package builds on past work on borkerage (Marsden 1982), but is a more #...explicitly group oriented measure. Unlike Burt's (1992) measure, the Gould-Fernandez measure requires specifying a group variable #...based on an attribute. I use race in the example below. #Brokerage Roles: Group-Based Concept #w_I: Coordinator Role (Mediates Within Group Contact) #w_O: Itinerant Broker Role (Mediates Contact between Individuals in a group to which the actor does not belong) #b_{IO}: Representative: (Mediates incoming contact from out-group members) #b_{OI}: Gatekeeper: (Mediates outgoing contact from in-group members) #b_O: Liason Role: (Mediates contact between individuals of two differnt groups, neither of which the actor belongs) #t: Total or Cumulative Brokerage (Any of the above paths) ?brokerage #for more information AHS_Brokerage <- brokerage(AHS_Network, Race) AHS_Brokerage hist(AHS_Brokerage$cl, xlab="Cumulative Brokerage", prob=TRUE) AHS_CBrokerage <- (AHS_Brokerage$cl) gplot(AHS_Network, vertex.cex=AHS_CBrokerage*.5, gmode="digraph") #Jimi Adams's Function for Calculating Effective Size #Effective size is the average degree of ego network without counting alters' ties to ego #Detaching to ensure that Statnet and iGraph do not conflict detach("package:sna", unload=TRUE) library(igraph) #Loading Example Data load("Flo_Edges.Rda") load("Flo_Nodes.Rda") g=graph.data.frame(Flo_Edges) V(g)$ego=as.character(Flo_Nodes$ego[match(V(g)$name,Flo_Nodes$ID)]) V(g) plot(g, vertex.label=Flo_Nodes$ego, edge.arrow.size=.05, edge.arrow.width=.05, vertex.size=degree(g,mode = "in")) effective.size <- function(g, ego, mode="all") { # igraph doesn't have an "effective size" command n <- degree(g, mode=mode)[ego] # ego's degree es <- n # initializing effective size ns <- neighbors(g,ego, mode=mode) # identifying ego's neighborhood if(n>0){ for (j in 1:n){ # looping over everyone in ns nsns <- neighbors(g,ns[j], mode=mode) # finding neighbors' neighbors r <- length(intersect(ns, nsns)) # only those also in ego's neighborhood es <- es - (r/n) # subtracting redundancies } } return(es) } effective.size(g, "9", mode="all") #Trying on Our School Networks AHS_Graph=graph.data.frame(AHS_Edges) effective.size(AHS_Graph, "1", mode="all") #Jimi Adams's Function for Calculating the Index of Qualitative Variatio #The index of qualitative variation (IQV) is a measure of variation among the categories #of a qualitative variable. #It is calculated as [1 - sum(p2)] * [K / (K - 1)], #where p is the proportion in each category, and K is the number of categories. #The variable ranges from 0 to 1, where 0 represents a completely homogeneous group, #and 1 represents a group with equal parts in each category. iqv <- function(graph, attribute) { N <- length(V(graph)) cats <- unique(get.vertex.attribute(graph,attribute,V(graph))) nlev <- length(cats) cat_list <- rep(0,N) p <- rep(0, N) p2_list <- as.list(0) for (j in 1:nlev) { for(i in 1:length(V(graph))){ i_att <- get.vertex.attribute(graph, attribute, V(graph)[neighborhood(graph,1)[[i]]]) att <- length(which(i_att==cats[j])) num <- length(V(graph)[neighborhood(graph, 1)[[i]]]) p[i]<-att/num p2<-p*p } p2_list[[j]] <- p2 cat_list <- cat_list + p2 } IQV <- (nlev/(nlev-1))*(1-cat_list) IQV1 <- as.list(0) IQV1[[2]] <- IQV IQV1[[1]] <- mean(IQV) names(IQV1) <- c("full_graph", "egonet") return (IQV1) } #Assigning Attributes AHS_Graph <- AHS_Graph %>% set_vertex_attr("sex", value = AHS_Nodes$sex) %>% set_vertex_attr("grade", value = AHS_Nodes$grade) V(AHS_Graph) #This function takes some time to calculate for a network of this size #because you are calculating the variation ratio for each person #in a passed complete network, for a single attribute at a time. iqv(AHS_Graph, "sex") #Detaching to ensure that Statnet and iGraph do not conflict detach("package:igraph", unload=TRUE) library(sna) #Structural Equivalence #Structural equivalence: Similarity/Distance Measures Include: #Correlation #Euclidean Distance #Hamming Distance #Gamma Correlation sedist(AHS_Network, mode="digraph", method="hamming") #Cluster based on structural equivalence: AHS_Clustering <- equiv.clust(AHS_Network, mode="digraph",plabels=network.vertex.names(AHS_Network)) AHS_Clustering #Specification of the equivalence method used plot(AHS_Clustering) #Plot the dendrogram rect.hclust(AHS_Clustering$cluster, h=30) #Generating a Block Model based on the Structural Equivalence Clustering AHS_BM <- blockmodel(AHS_Network, AHS_Clustering, h=30) AHS_BM #Extract the block image for Visualization bimage <- AHS_BM$block.model bimage bimage[is.nan(bimage)] <- 1 #Visualizing the block image (with self-reflexive ties) gplot(bimage, diag=TRUE, edge.lwd=bimage*5, vertex.cex=sqrt(table(AHS_BM$block.membership))/2, gmode="graph", vertex.sides=50, vertex.col=gray(1-diag(bimage)))