*This do file cleans the new CMS POS Full Hospital File (vDec2018)
*Date: 02/09/2024
*Access file: POS_OTHER_DEC18.csv
********************************************************************************


use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/POS_OTHER_DEC18.csv"
//started with n=146,696

*Exclude if facility closed before 2006 (51,950 observations deleted)
drop if trmntn_exprtn_dt < 20060101

*Exclude non-hospitals (86,456 observations deleted)
drop if prvdr_ctgry_cd != 1

*Include only acute-care hospitals ((1,715 observations deleted) (257 observations deleted)
drop if prvdr_ctgry_sbtyp_cd>1 & prvdr_ctgry_sbtyp_cd<11 
drop if prvdr_ctgry_sbtyp_cd==20 | prvdr_ctgry_sbtyp_cd==24

// 01=Short Term
// 02=Long Term
// 03=Religious Non-Medical Health Care Institutions
// 04=Psychiatric
// 05=Rehabilitation
// 06=Childrens Hospitals
// 07=Distinct Part Psychiatric Hospital
// 11=Critical Access Hospitals
// 20=Transplant Hospitals
// 22=Medicaid Only Short-Term Hospitals
// 23=Medicaid Only Childrens Hospitals
// 24=Medicaid Only Children's Psychiatric
// 25=Medicaid Only Psychiatric Hospitals
// 26=Medicaid Only Rehabilitation Hospitals
// 27=Medicaid Only Long-Term Hospitals
// 28=Rural Emergency Hospital

*Exclude non-Federal Emergency Hospitals ("E") and Federal Emergency Hospitals ("F") (799 observations deleted)
drop if missing(prvdr_ctgry_sbtyp_cd) //drop if prvdr_ctgry_sbtyp_cd >= .

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_DEC18_cleaned.dta", replace

*Further cleaning on 2/19 to remove non-US-contingent hospitals: see 2.15 do-file (64 observations deleted)

*Analyzing the non-closed hosp from the merged UNC x CMS POS file
*Defining Rural using FORHP Eligible Zip Codes 
*Created new Final Dataset (Feb 19)
*Source file: 
*Date: 02/12/2024 & 02/15/2024
********************************************************************************

tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL.dta", clear

drop if _merge == 3 
drop if trmntn_exprtn_dt != .

save `tmp'

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_leftover_.dta"

//cleaning FORHP Eligible ZIPs to match CMS_POS_2018
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/Other Data/Rural/forhp-eligible-zips.dta", clear

rename ZIP zip_cd

destring zip_cd, replace

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/Other Data/Rural/forhp-eligible-zips.dta", replace

//merging FORHP Eligible ZIPs with CMS_POS_2018
tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL.dta", clear

save `tmp'

rename _merge merge1

merge m:1 zip_cd using "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/Other Data/Rural/forhp-eligible-zips.dta" //master=CMS_POS_2018_RURAL

drop if _merge == 2 //drop rural ZIPS without matched hospitals

gen rural_FORHP_eligible_zip=0
 
replace rural_FORHP_eligible_zip=1 if _merge==3

order rural_FORHP_eligible_zip, a(rural)

order zip_cd, a(ZIP)

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef.dta" //merge1 = CMSPOS2018 x UNCSheps; _merge = CMSPOS2018 x forhp-eligible-zips

//remove hospitals located in 5 US territories (64 observations deleted)
drop if state_cd == "AS" | state_cd == "FN" | state_cd == "GU" | state_cd == "MP" | state_cd == "MX" | state_cd == "PR" | state_cd == "VI" 

// AS=AMERICAN SAMOA
// FN=INTERNATIONAL
// GU=GUAM
// MP=SAIPAN
// MX=MEXICO  
// PR=PUERTO RICO 
// VI=VIRGIN ISLANDS

***Sorting discrepancies between Sheps FORHP Rural/Urban def vs. FORHP eligible zips
//from UNC Sheps: ID FORHPRURALURBAN2021 [added x2] ZIP [added] FIPS CBSA2017 [added] RUCACode SPECIALPAYMENT

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef.dta"

list ID if rural==1 & rural_FORHP_eligible_zip!=1 //1 hosp (ID 111318) identified as Rural by UNCSheps, Urban by eligiblezips, confirmed as Rural

replace rural_FORHP_eligible_zip = 1 in 1189 //permanently replace ID: 111318 (Monroe County Hospital) to Rural

//trial sort to determine which are the "new" rural hospitals not in Sheps2018 hospital list
tempfile tmp2

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef.dta"

save `tmp2'

keep if merge1 == 3 & rural!=1 & rural_FORHP_eligible_zip==1

clear

***REDID Do-file 2.12 PE_rural_dataset_FORHP... to create net new FINAL DATASET

//Hospital Obs: 5,455 hospitals (including ones that closed in between time period) --> final Obs: 5,462 hospitals
//Weird: 7 obs from PE dataset NOT in CMSPOS2018 Hospital List -- why?
//   190182 - insample, but says term date 2005
// 	 390155 - not in sample, says term date 1996
// 	 390167 - not in sample, says term date 1997
// 	 450623 - in sample, but says term date 2005
// 	 450633 - not in sample, says term date 1999
// 	 453089 - not in sample, rehab
// 	 504008 - not in sample, psych hospital

tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/PE Dataset/clean_pe_list_long_RURAL.dta"

drop if pe_deal_yr < 2004

sort ID

egen yearPEdeal = min(pe_deal_yr), by(ID)

keep ID yearPEdeal

duplicates drop

save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef.dta", clear

describe

duplicates report ID

keep ID rural_FORHP_eligible_zip cbsa_urbn_rrl_ind RUCACode SPECIALPAYMENT fac_name city_name st_adr state_cd bed_cnt prvdr_ctgry_cd prvdr_ctgry_sbtyp_cd orgnl_prtcptn_dt trmntn_exprtn_dt chow_cnt chow_dt chow_prior_dt elgblty_sw pgm_prtcptn_cd physn_cnt rn_cnt emplee_cnt pgm_trmntn_cd gnrl_cntl_type_cd gnrl_fac_type_cd colctn_stus_sw mdcd_mdcr_prtcptg_prvdr_sw merge1

merge 1:1 ID using `tmp' 
//master=CMSPOS2018_rural_newdef, using=PE dataset

gen _insample = _merge != 2

gen everPE = !missing(yearPEdeal)

tab everPE, m 

expand 19

sort ID

by ID : gen year = 2003 + _n

tab year , m 

gen currentPE = yearPEdeal < year

order year, a(ID)

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2.dta"

*Add everPE = 1 for hospitals under PE deals prior to 2004 //***These hospitals do not have corresponding dates for yearPEdeal
// prvdr_num
// 140054
// 190202
// 220174
// 180007
// 320009
// 320017
// 320074
// 450130
// 140207
// 220098
// 320019
// 450058
describe ID

destring ID, gen(pn)

replace everPE = 1 if pn == 140054
replace everPE = 1 if pn == 190202
replace everPE = 1 if pn == 220174
replace everPE = 1 if pn == 180007
replace everPE = 1 if pn == 320009
replace everPE = 1 if pn == 320017
replace everPE = 1 if pn == 320074
replace everPE = 1 if pn == 450130
replace everPE = 1 if pn == 140207
replace everPE = 1 if pn == 220098
replace everPE = 1 if pn == 320019
replace everPE = 1 if pn == 450058

replace currentPE = 1 if pn == 140054
replace currentPE = 1 if pn == 190202
replace currentPE = 1 if pn == 220174
replace currentPE = 1 if pn == 180007
replace currentPE = 1 if pn == 320009
replace currentPE = 1 if pn == 320017
replace currentPE = 1 if pn == 320074
replace currentPE = 1 if pn == 450130
replace currentPE = 1 if pn == 140207
replace currentPE = 1 if pn == 220098
replace currentPE = 1 if pn == 320019
replace currentPE = 1 if pn == 450058

*Sorts the PE dataset by completing a rural-urban cross check using FORHP HRSA website
*Creates the merged CMSPOS_FINAL_DATASET
*Date: 02/12/2024
********************************************************************************

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/PE Dataset/clean_pe_list_long_RURAL.dta", clear

order FORHPRURALURBAN2021, a(ID)

*manually input rural/urban for the missing values


*new variable to track hospital closures and year
gen closure = 0
gen closure_year = .

order closure, a(pe_hosp_state1)
order closure_year, a(closure)

gen rural = strmatch(FORHPRURALURBAN2021, "Rural")
order rural, a(FORHPRURALURBAN2021)

rename FORHPRURALURBAN2021 FORHP

*create a new file merging PE list and 2018 CMS POS Hospital List (FINAL!)
tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/PE Dataset/clean_pe_list_long_RURAL.dta"

drop if pe_deal_yr < 2004

sort ID

egen yearPEdeal = min(pe_deal_yr), by(ID)

keep ID yearPEdeal

duplicates drop

save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL.dta", clear

describe

duplicates report ID

keep ID FORHPRURALURBAN2021 ZIP FIPS CBSA2017 RUCACode SPECIALPAYMENT fac_name city_name st_adr state_cd prvdr_ctgry_cd prvdr_ctgry_sbtyp_cd orgnl_prtcptn_dt trmntn_exprtn_dt chow_cnt chow_dt chow_prior_dt elgblty_sw bed_cnt pgm_prtcptn_cd physn_cnt rn_cnt emplee_cnt

merge 1:1 ID using `tmp' //master=CMS POS 2018, using=PE dataset

gen _insample = _merge != 2

gen everPE = !missing(yearPEdeal)

tab everPE, m 

expand 19

sort ID

by ID : gen year = 2003 + _n

tab year , m 

gen currentPE = yearPEdeal <= year

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_FINALDATASET.dta"

*clean CMSPOS_FINAL_DATASET

rename FORHPRURALURBAN2021 FORHP

order state_cd, a(st_adr)

order ZIP, a(state_cd)

order city_name, a(st_adr)

label var fac_name "Facility Name"
label var city_name "City Name"
label var st_adr "Address"
label var state_cd "State of Facility"
label var prvdr_ctgry_cd "Provider Type"
label var prvdr_ctgry_sbtyp_cd "Provider Subtype"
label var trmntn_exprtn_dt "Facility Open Date"
label var prvdr_ctgry_cd "Facility Closure Date"
label var chow_cnt "# Times Prvdr Change of Ownership"
label var chow_dt "Date of Prvdr Change of Ownership (Most Recent)"
label var chow_prior_dt "Date of Previous Prvdr Change of Ownership"
label var elgblty_sw "Y/N Eligibility for Medicare/Medicaid"
label var bed_cnt "Bed count"
label var pgm_prtcptn_cd "1=Medicare only, 2=Medicaid only, 3=Both"
label var physn_cnt "# FTE Physicians"
label var rn_cnt "# FTE RNs"
label var emplee_cnt "# FTEs Total"

*summary stats (3) CMS HCRIS using asdoc
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/hospital-cost-report-merged/hcris_merged.dta", clear
ssc install asdoc, replace
asdoc summarize year beds_adultped beds_total ccr ccr_prog ipdischarges_adultped income totcost margin netpatrev opexp iphosprev ipgenrev iptotrev optotrev tottotrev nonmcbaddebt costuccare_v2010 prog_op_cost prog_rt_chg

*clean CMS_POS_2018_RURAL
gen rural = strmatch(FORHPRURALURBAN2021, "Rural")
order rural, a(FORHPRURALURBAN2021)

tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped_merged_cleaned_FY.dta", clear

keep pn year beds_total_wtd bed_cnt ccr_wtd ccr_winsor ipdischarges_adultped ipdischarges_winsor income totcost margin uccare_chg_harmonized uccare_cost_harmonized netpatrev opexp iphosprev tottotrev nonmcbaddebt frac_year_covered covg_begin_dt covg_end_dt flag_short flag_long trmntn_exprtn_dt merge_cmspos pe_deal_yr closure closure_year merge_PEdata nscans one nscans_updated

save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2.dta", clear

merge 1:1 pn year using `tmp' //using=hcrisv12_dmc_FY

drop pe_deal_yr merge_PEdata merge_cmspos

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED.dta"

*Relabling for Tables

label var bed_cnt "Bed Count"
label var physn_cnt "Physician FTE"
label var rn_cnt "RN FTE"
label var ccr_winsor "CCR, Winsorized 1%"
label var ipdischarges_winsor "Inpatient Discharges, Winsorized 1%"
label var income "Total Income"
label var totcost "Total Cost"
label var margin "Profit Margin"
label var iphosprev "Inpatient Hospital Revenue" 
label var tottotrev "Total Patient Revenue (IP & OP)"
label var netpatrev "Net Patient Revenue"

*Merge in Info from Adam Sacarny pos.v12.dta

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/provider-of-services/pos.v12.dta"

generate byte non_numeric = indexnot(pn, "0123456789.-")
drop if non_numeric > 0
destring pn, replace

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/provider-of-services/pos.v12_AHedits.dta"

*Cleaning NEW CMS HCRIS - Cost Data v12*
*Source file: 
*Date: 02/28/2024
********************************************************************************
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyear.v12.dta", clear

*Observations Before Cleaning //START: n=153,287
hist year, discrete xtitle("Years") title ("PN-Year Count")

//From AS: As in the source data from CMS, hospitals that close will persist in the data but will have a termination code and termination date. Hospitals that merge, change subtype, or change type of control will usually get a new provider number, which means their old provider number will terminate (and remain in the data). * Relevance: Hospitals that close and reopen will have a diff pn, so one hospital may show up in the data multiple times

*Cleaning, Following 2.19 Do-File 
rename pn ID
destring ID, gen(pn)

//Cleaning beds_adultped_wtd *NOTE: replaced original variable, did not make new var
//high beds
list beds_total_wtd if pn == 312018
replace beds_total_wtd = 76 in 62809
list beds_total_wtd if pn == 312018
list beds_total_wtd if pn == 131316
replace beds_total_wtd = 21 in 25300
list beds_total_wtd if pn == 131316
list beds_total_wtd if pn == 370215
replace beds_total_wtd = 99 in 76854
list beds_total_wtd if pn == 223300
replace beds_total_wtd = 112 in 46706
replace beds_total_wtd = 112 in 46707
list beds_total_wtd if pn == 033025
replace beds_total_wtd = 60 in 4047
list beds_total_wtd if pn == 310028
replace beds_total_wtd = 132 in 61943
list beds_total_wtd if pn == 050179
replace beds_total_wtd =  209 in 7967
list beds_total_wtd if pn == 340129
replace beds_total_wtd = 123 in 69275
 replace beds_total_wtd = 123 in 69276
list beds_total_wtd if pn == 050567
replace beds_total_wtd = 465 in  10409
replace beds_total_wtd = 465 in  10410
list beds_total_wtd if pn == 450108
list beds_total_wtd if pn == 450056
replace beds_total_wtd = 410 in  90560
 replace beds_total_wtd = 410 in  90561
list beds_total_wtd if pn == 450056
list beds_total_wtd if pn == 521357
replace beds_total_wtd = 25 in 107584
replace beds_total_wtd = 25 in 107585
list beds_total_wtd if pn == 230041
replace beds_total_wtd = 356 in 47396
replace beds_total_wtd = 356 in 47397
list beds_total_wtd if pn ==140158
list bed_cnt if pn ==140158
replace beds_total_wtd = 279 in 83896
replace beds_total_wtd = 279 in 83897
//low beds
list beds_total_wtd if pn == 101311
replace beds_total_wtd = 25 in 20029
replace beds_total_wtd = 25 in 20030
list beds_total_wtd if pn == 340097
list beds_total_wtd if pn == 450758
replace beds_total_wtd = 33 in 93898
replace beds_total_wtd = 33 in 93899
replace beds_total_wtd = 33 in 93900
replace beds_total_wtd = 33 in 93901

//Exclusion criteria
drop if year < 2004 (43,417 observations deleted) //n=109,870

drop beds_adultped_wtd beds_adultped_min beds_adultped_max beds_totadultped_wtd beds_totadultped_min beds_totadultped_max icu_beds_wtd icu_beds_min icu_beds_max ccu_beds_wtd ccu_beds_min ccu_beds_max bicu_beds_wtd bicu_beds_min bicu_beds_max sicu_beds_wtd sicu_beds_min sicu_beds_max othspec_beds_wtd othspec_beds_min othspec_beds_max availbeddays_adultped ipbeddays_adultped ipicrev iprcrev ipancrev ipoprev opancrev totinitchcare ppaychcare chguccare prog_op_cost prog_rt_chg prog_net_chg prog_chg costuccare_v2010 costchcare

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped.dta"


*Merging HCRISv12 with CMSPOS2018, follwing 2.25 Do-File
tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped.dta", clear

save `tmp'

merge m:1 pn using "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_tmp.dta" //master=CMS_HCRISv12

drop if _merge == 1 //drop if NOT in the CMSPOS_final2 list of hospitals (25,464 observations deleted)

drop ID yearPEdeal rural_FORHP_eligible_zip cbsa_urbn_rrl_ind RUCACode SPECIALPAYMENT prvdr_ctgry_sbtyp_cd prvdr_ctgry_cd chow_cnt chow_dt city_name elgblty_sw fac_name orgnl_prtcptn_dt chow_prior_dt state_cd st_adr pgm_trmntn_cd gnrl_cntl_type_cd gnrl_fac_type_cd colctn_stus_sw mdcd_mdcr_prtcptg_prvdr_sw pgm_prtcptn_cd physn_cnt rn_cnt emplee_cnt merge1 merge2

rename _merge merge_cmspos

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped-TEMPORARY.dta"


*Merging HCRISv12 with PEhosp, following 2.25 Do-File
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/PE Dataset/clean_pe_list_long_RURAL.dta"

tempfile tmp1
duplicates drop pn, force
keep pn pe_deal_yr closure closure_year
save `tmp1'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped-TEMPORARY.dta", clear

merge m:1 pn using `tmp1' //using = pe_list; merge1 refers to the hcris x cmspos merge

rename _merge merge_PEdata

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped_merged.dta"


***CLEANING****

*Re-organizing new CMS_HCRIS_v12_dropped_merged file
order bed_cnt, a(beds_total_max)
order ccr_wtd, a(bed_cnt)
 order ccr_min, a(ccr_wtd)
 order ccr_max, a(ccr_min)

//Sorting hospitals by number of year cost data: nscans
bysort pn : gen nscans = _N //nscans = number of hospitals with that pn
list pn if nscans==1

//Counter for number of distinct hospitals
bys pn: gen one = 1 if _n==1
qui sum one, meanonly
local N=r(N)
global N: di %12.0fc `N'
count if one==1 //n=5,462 hospitals

*Labeling New Variables
label var closure "Closure Dummy - From PE Dataset"
label var closure_year "Closure Year - From PE Dataset"
label var nscans "Number of Hospital-Years for that PN"
label var one "Counter for Distinct Hospitals"


*Cleaning Y Variables: n=84,434 observ
count if income == 0 //n=1,494

//From AS: In the synthetic calendar year data, a value is set to missing if any embodied cost report had a missing value. Note: dollar variables (e.g. costs, charges, etc.) are recoded to 0 in the report-level data.

//1. CCR
//Winsorized at the 1% and 99% level -- Since CCR is a calculation, not a submitted datapoint, I winsorized it to cut out some of the noise
summarize ccr_wtd, detail
winsor ccr_wtd, p(.01) gen(ccr_winsor)
order ccr_winsor, a(ccr_wtd)

//2. Inpatient Discharges
summarize ipdischarges_adultped, detail
winsor ipdischarges_adultped, p(.01) gen(ipdischarges_winsor)
order ipdischarges_winsor, a(ipdischarges_adultped)

//3. Income
replace income = . if income == 0 //(1,494 real changes made, 1,494 to missing)

//4. Total Cost
summarize totcost, detail
sort totcost //n=6 obs have a (-) totcost --> drop
replace totcost = . if totcost <=0 //(626 real changes made, 626 to missing)

//5. Margin
sort margin
replace margin = . in 1 //**Dropped one because it was WAYYY too unreasonable

//6. Net Patient Revenue 
replace netpatrev = . if netpatrev == 0 //(1,501 real changes made, 1,501 to missing)

//7. Operating Expenses
replace opexp = . if opexp == 0 //(621 real changes made, 621 to missing)

//8. Total Patient Revenue (sum of inpatient and outpatient revenue)
replace tottotrev = . if tottotrev == 0 //(1,487 real changes made, 1,487 to missing)

//9. Cost per Patient Discharge - Newly Generated Variable!
gen cost_per_discharge = totcost/ipdischarges_winsor //Note: used winsorized discharges
order cost_per_discharge, a(margin)
label var cost_per_discharge "totcost/ipdischarges_winsor (new)"

//10. RN FTE and Physician FTE -- changed much later on 3/14 on FINAL_DATASET_insample
winsor physn_cnt, p(.01) gen(physn_cnt_winsor)  //Note: physician CNT data has lots of 0s
clonevar adj_physn_cnt = physn_cnt if physn_cnt != 0
order physn_cnt_winsor, a(physn_cnt)
order adj_physn_cnt, a(physn_cnt)

twoway scatter bed_cnt rn_cnt
clonevar adj_rn_cnt = rn_cnt if rn_cnt < 10000 //Note: use rn_cnt_adj, it looks pretty reasonable AFTER I cut out the VERYYYY unreasonable ones (n=2)) 
order adj_rn_cnt, a(rn_cnt)
winsor rn_cnt, p(.01) gen(rn_cnt_winsor)
order rn_cnt_winsor, a(rn_cnt)


save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped_merged_cleaned.dta"


* New Version for full-year cost reports ONLY
drop if frac_year_covered != 1 //(4,680 observations deleted)

//Redid "Cleaning Y Variables" Part from above

bysort pn : gen nscans_updated = _N
label var nscans_updated "Nscans for full year cost reports only"

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped_merged_cleaned_FY.dta"



*Merging HCRIS data into CMS_POS_2018_FINAL_DATASET_2
*Source file: 
*Date: 02/29/2024
********************************************************************************
tempfile tmp

keep pn year active termcode termdate prev_pn cah provider_subtype typ_control nonprofit forprofit govt

save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED.dta", clear

merge 1:1 pn year using `tmp' //using=pos.v12_AHedits

drop if _merge == 2 //dropped observations from pos.v12 not relevant to my cmspos2018 dataset (171,161 observations deleted)

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED.dta", replace 


* Summary Stat for FINAL_DATASET_2

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED.dta", clear
drop emplee_cnt //no values for any of the data

table ( var ) ( rural_FORHP_eligible_zip ) (), statistic(mean bed_cnt ipdischarges_winsor income totcost margin netpatrev iphosprev tottotrev ccr_winsor physn_cnt rn_cnt) statistic(sd bed_cnt ipdischarges_winsor income totcost margin netpatrev iphosprev tottotrev ccr_winsor physn_cnt rn_cnt) statistic(frequency) nformat(%5.2f mean sd) sformat((%s) sd) style(table-1) 

collect label levels rural_FORHP_eligible_zip 0 "Urban" 1 "Rural" .m "Total", modify

collect style cell, halign(left)

collect export "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/Results/Rural-Urban SumStat2.docx", as(docx) replace


**Purpose: append hospital characteristics to the final list of hospitals sheet (preextension)
tempfile tmp

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/provider-of-services/pos_lastyear.v12.dta"

keep pn active termcode termdate prev_pn shortterm cah provider_subtype typ_control nonprofit forprofit govt beds_tot lastyear

save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta"

merge 1:1 pn using `tmp' //using=pos.v12_lastyear

drop if _merge == 2

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta", replace

* Merge non-full year hospitals with CMSPOS2018_FINAL_DATASET_2_MERGED
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_merged_hospyearv12_dropped_merged_cleaned.dta"

tempfile tmp
preserve
 keep if frac_year_covered == 1
 keep pn
 duplicates drop
 save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_dropped_fracyearcovered.dta", replace
restore

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta"

merge 1:1 pn using "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS HPCRIS/Adam Sacarny Data - HCRIS/hospital-cost-report-merged-hospyear/hcris_dropped_fracyearcovered.dta"

gen insample = 1 if _merge == 3
replace insample = 0 if insample == .

save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta", replace

* Edits to preextension dataset (March 3)
//"active" is updated; use "active" instead of "closure"
sort _merge
replace active = 0 in 3
replace active = 0 in 9
replace active = 1 in 1
replace active = 1 in 2
replace active = 1 in 4
replace active = 1 in 5
replace active = 1 in 6
replace active = 1 in 7
replace active = 1 in 8
replace active = 1 in 10
replace active = 1 in 11
replace active = 1 in 12
replace active = 1 in 13
replace active = 1 in 14
replace active = 1 in 15
replace active = 1 in 16
replace active = 1 in 17
replace active = 1 in 18
replace active = 1 in 19
replace active = 1 in 20
replace active = 1 in 21
replace active = 1 in 22
replace active = 1 in 23
label var closure "Generated from CMSPOS_2018" //closure NOT updated
replace closure = 1 if active == 0 //updated old closure measure to match "active"
label var closure "Generated from CMSPOS_2018, updated to match 'active'" //closure updated


//missing Rural-Urban designation
sort _merge
replace fac_name = "TULANE-LAKESIDE HOSPITAL" in 4202
replace city_name = "METAIRIE" in 4202
replace state_cd = "LA" in 4202
replace rural_FORHP_eligible_zip = 0 in 4202
replace fac_name = "NORTHEAST MEDICAL CENTER" in 1999
replace city_name = "BONHAM" in 1999
replace state_cd = "TX" in 1999
replace rural_FORHP_eligible_zip = 1 in 1999
replace rural_FORHP_eligible_zip = 1 in 1338
replace fac_name = "PALESTINE REGIONAL MEDICAL CENTER" in 1338
replace city_name = "PALESTINE" in 1338
replace state_cd = "TX" in 1338
replace closure = . in 1338
sort pgm_trmntn_cd
replace closure = . in 5457
replace closure = . in 5458
replace closure = . in 5459
replace closure = . in 5460
replace closure = . in 5461
replace closure = . in 5462


*Summary Stat for preextension dataset

In FINAL_preextension_new, n= 5296

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta"


table ( var ) ( everPE ) (), statistic(mean rural_FORHP_eligible_zip) statistic(sd ) statistic(frequency) nformat(%5.2f mean sd) sformat((%s) sd) style(table-1) 

*HHR Merge*
*Source file: 
*Date: 03/25/2024
********************************************************************************

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta"
tempfile tmp
drop if insample != 1
keep pn fac_name city_name state_cd st_adr everPE bed_cnt
save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef.dta"
destring ID, gen(pn)
keep pn rural_FORHP_eligible_zip zip_cd

merge 1:1 pn using `tmp' //using = preextension
drop if _merge == 1
save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMS_POS_2018_RURAL_newdef_march31.dta"
export excel using "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/Hospitals_for_GIS_march31.xls", firstrow(variables)


//fix this beds_cnt vs beds_tot thing -- April 1st
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_final_preextension_new.dta"
tempfile tmp
drop if insample != 1
keep pn beds_tot
save `tmp'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta"
merge m:1 pn using `tmp'

order beds_tot, a(bed_cnt)
list pn if beds_tot != bed_cnt
levelsof pn if beds_tot != bed_cnt
 levelsof pn if beds_tot > bed_cnt
levelsof pn if beds_tot < bed_cnt
label var beds_tot "Do not use"
order posbeds, a(sysname)
///conclusion: use bed_cnt
save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta", replace


//merge in HRR info from ArcGIS
use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/Other Data/Aggregate_Points_for_BedCnt.dta"
tempfile tmp1
rename hrr hrrname
rename hrrnum hrr
keep hrr hrrname sum_bed_cnt point_count
save `tmp1'

use "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta", clear
merge m:1 hrr using `tmp1'
sort _merge pn year
save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta", replace

//fix the HRRs that did not merge over
levelsof hrr if sum_bed_cnt == . //10 150 286 458
total bed_cnt if one == 1 & hrr == 10 //1499
replace sum_bed_cnt = 1499 if hrr == 10
replace point_count = 24 if hrr == 10
total bed_cnt if one == 1 & hrr == 150 
replace sum_bed_cnt = 2442 if hrr == 150
replace point_count = 23 if hrr == 150
total bed_cnt if one == 1 & hrr == 286 //only ONE hosp, do not count
total bed_cnt if one == 1 & hrr == 458 //only ONE hosp, do not count
save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta", replace

*Add HRR data and Closure Model*
*Source file: 
*Date: 03/27/2024
********************************************************************************

*Note: Alaska (PN: 20004 has no HRR)


//add HRR data
replace hrr = 1 if pn == 10084
replace hrr = 1 if pn == 10098
replace hrr = 11 if pn == 30018
replace hrr = 11 if pn == 30019
replace hrr = 15 if pn == 30040
replace hrr = 12 if pn == 30060
replace hrr = 15 if pn == 30080
replace hrr = 12 if pn == 30106
replace hrr = 12 if pn == 30116
replace hrr = 12 if pn == 30138
replace hrr = 12 if pn == 31318
replace hrr = 19 if pn == 40100
replace hrr = 21 if pn == 40138
replace hrr = 21 if pn == 40141
replace hrr = 280 if pn == 50015
replace hrr = 65 if pn == 50095
replace hrr = 82 if pn == 50114
replace hrr = 280 if pn == 50148
replace hrr = 89 if pn == 50331
replace hrr = 31 if pn == 50333
replace hrr = 58 if pn == 50377
replace hrr = 31 if pn == 50433
replace hrr = 80 if pn == 50447
replace hrr = 56 if pn == 50456
replace hrr = 58 if pn == 50497
replace hrr = 62 if pn == 50569
replace hrr = 56 if pn == 50578
replace hrr = 80 if pn == 50583
replace hrr = 56 if pn == 50584
count if one == 1 and hrr == .
count if one == 1 & hrr == .
replace hrr = 82 if pn == 50662
replace hrr = 80 if pn == 50698
replace hrr = 85 if pn == 50707
replace hrr = 56 if pn == 50741
replace hrr = 77 if pn == 50783
replace hrr = 103 if pn == 60129
replace hrr = 102 if pn == 60130
replace hrr = 102 if pn == 61344
replace hrr = 111 if pn == 70009
replace hrr = 127 if pn == 100114
replace hrr = 127 if pn == 100172
replace hrr = 127 if pn == 100225
replace hrr = 119 if pn == 100279
replace hrr = 144 if pn == 110020
replace hrr = 144 if pn == 110033
replace hrr = 144 if pn == 110172
replace hrr = 152 if pn == 130062
replace hrr = 161 if pn == 140105
replace hrr = 156 if pn == 140152
replace hrr = 171 if pn == 140205
replace hrr = 183 if pn == 150027
replace hrr = 205 if pn == 150134
replace hrr = 186 if pn == 150147
replace hrr = 179 if pn == 151321
replace hrr = 286 if pn == 170022
replace hrr = 268 if pn == 170180
replace hrr = 200 if pn == 170193
replace hrr = 201 if pn == 171305
replace hrr = 204 if pn == 180007
replace hrr = 218 if pn == 190124
replace hrr = 218 if pn == 190152
replace hrr = 216 if pn == 190182
replace city_name = "METAIRIE" if pn == 190182
replace hrr = 218 if pn == 190185
replace hrr = 217 if pn == 190197
replace hrr = 216 if pn == 190206
replace hrr = 210 if pn == 190249
replace hrr = 218 if pn == 190271
replace hrr = 227 if pn == 220133
replace hrr = 230 if pn == 220153
replace hrr = 227 if pn == 220154
replace hrr = 230 if pn == 220172
replace hrr = 234 if pn == 230119
replace hrr = 259 if pn == 230190
replace hrr = 245 if pn == 230223
replace hrr = 234 if pn == 230293
replace hrr = 234 if pn == 230298
replace hrr = 249 if pn == 231302
replace hrr = 371 if pn == 241324
replace hrr = 257 if pn == 250125
replace hrr = 268 if pn == 260107
replace hrr = 273 if pn == 260159
replace hrr = 268 if pn == 260166
replace hrr = 274 if pn == 270011
replace hrr = 277 if pn == 280123
replace hrr = 279 if pn == 290058
replace hrr = 289 if pn == 310013
replace hrr = 284 if pn == 310020
replace hrr = 284 if pn == 310037
replace hrr = 289 if pn == 310090
replace hrr = 293 if pn == 320019
replace hrr = 293 if pn == 320090
replace hrr = 3-3 if pn == 330230
replace hrr = 303 if pn == 330230
replace hrr = 301 if pn == 330314
replace hrr = 295 if pn == 330339
replace hrr = 303 if pn == 330387
replace hrr = 330 if pn == 330408
replace hrr = 295 if pn == 331320
replace hrr = 296 if pn == 330408
replace hrr = 320 if pn == 340072
replace hrr = 318 if pn == 340124
replace hrr = 311 if pn == 340137
replace hrr = 430 if pn == 340177
replace hrr = 251 if pn == 341301
replace hrr = 309 if pn == 341301
replace hrr = 315 if pn == 341312
replace hrr = 324 if pn == 350010
replace hrr = 323 if pn == 350014
replace hrr = 329 if pn == 360047
replace hrr = 329 if pn == 360062
replace hrr = 326 if pn == 360100
replace hrr = 327 if pn == 360142
replace hrr = 330 if pn == 360187
replace hrr = 340 if pn == 370060
replace hrr = 339 if pn == 370174
replace hrr = 340 if pn == 370176
replace hrr = 339 if pn == 370200
replace hrr = 339 if pn == 370223
replace hrr = 339 if pn == 371341
replace hrr = 356 if pn == 390024
replace hrr = 357 if pn == 390103
replace hrr = 396 if pn == 390166
replace hrr = 357 if pn == 390166
replace hrr = 358 if pn == 390181
replace hrr = 351 if pn == 390246
replace hrr = 356 if pn == 390285
replace hrr = 356 if pn == 390286
replace hrr = 356 if pn == 390331
replace hrr = 321 if pn == 431303
replace hrr = 373 if pn == 440024
replace hrr = 380 if pn == 440026
replace hrr = 380 if pn == 440135
replace hrr = 379 if pn == 440147
replace hrr = 373 if pn == 440162
replace hrr = 379 if pn == 440222
replace hrr = 380 if pn == 440223
replace hrr = 380 if pn == 440224
replace hrr = 380 if pn == 441322
replace hrr = 391 if pn == 450031
replace hrr = 396 if pn == 450047
replace hrr = 406 if pn == 450201
replace hrr = 391 if pn == 450393
replace hrr = 391 if pn == 450758
replace hrr = 393 if pn == 450760
replace hrr = 402 if pn == 450811
replace hrr = 391 if pn == 450849
replace hrr = 406 if pn == 450850
replace hrr = 406 if pn == 450868
replace hrr = 412 if pn == 450879
replace hrr = 423 if pn == 460008
replace hrr = 423 if pn == 460020
replace hrr = 426 if pn == 490073
replace hrr = 426 if pn == 490129
replace hrr = 439 if pn == 490129
replace hrr = 439 if pn == 500134
replace hrr = 334 if pn == 500140
replace hrr = 439 if pn == 500154
replace hrr = 432 if pn == 510067
replace hrr = 443 if pn == 511302
replace hrr = 451 if pn == 520040
replace hrr = 447 if pn == 520152
replace hrr = 250 if pn == 520173
replace hrr = 458 if pn == 530034
replace hrr = 423 if pn == 530035
replace hrr = 382 if pn == 670003
replace hrr = 386 if pn == 670007
replace hrr = 397 if pn == 670017
replace hrr = 394 if pn == 670045
replace hrr = 391 if pn == 670057
replace hrr = 412 if pn == 670125


save "/Users/amandahe/Documents/Spring 2024/Thesis - Both Sem/CMS Full Hospital Dataset/CMSPOS2018_FINAL_DATASET_2_MERGED_insample_reg_HRR.dta"
levelsof hrr if closure == 1
generate hrr_withPE=0

replace hrr_withPE = 1 if hrr ==1 | hrr ==2 | hrr == 5 | hrr == 7 | hrr == 10 | hrr == 11 | hrr == 12  | hrr == 15  | hrr ==19 | hrr == 22 | hrr == 23 | hrr == 56 | hrr == 79 | hrr == 82 | hrr == 85 | hrr == 96 | hrr == 102 | hrr == 103 | hrr == 106 | hrr == 110 | hrr == 111 | hrr == 115 | hrr == 116 | hrr == 118 | hrr == 119 | hrr == 120 | hrr == 122 | hrr == 123 | hrr == 127 | hrr == 129 | hrr == 130 | hrr == 133 | hrr == 134 | hrr == 137 | hrr == 139 | hrr == 140 | hrr == 141 | hrr == 142 | hrr == 144 | hrr == 145 | hrr == 146 | hrr == 147 | hrr == 148 | hrr == 151 | hrr == 152 | hrr == 155 | hrr == 156 | hrr == 166 | hrr == 188 | hrr == 194 | hrr == 200 | hrr == 201 | hrr == 204 | hrr == 205 | hrr == 208 | hrr == 209 | hrr == 212 | hrr == 213 | hrr == 216 | hrr == 217 | hrr == 218 | hrr == 219 | hrr == 227 | hrr == 231 | hrr == 234 | hrr == 240 | hrr == 244  | hrr ==257 | hrr == 268 | hrr == 273 | hrr == 276 | hrr == 279 | hrr == 282 | hrr == 284 | hrr == 289 | hrr == 293 | hrr == 309 | hrr == 311 | hrr == 312 | hrr == 315 | hrr == 318 | hrr == 320 | hrr == 327 | hrr == 335 | hrr == 336 | hrr == 339 | hrr == 340 | hrr == 344 | hrr == 346 | hrr == 347 | hrr == 354 | hrr == 356 | hrr == 364 | hrr == 365 | hrr == 366 | hrr == 373 | hrr == 376 | hrr == 377 | hrr == 379 | hrr == 380  | hrr ==383 | hrr == 385 | hrr == 386 | hrr == 390 | hrr == 391 | hrr == 393 | hrr == 394 | hrr == 396 | hrr == 397 | hrr == 402  | hrr ==406 | hrr == 412 | hrr == 413 | hrr == 416 | hrr == 418 | hrr == 421 | hrr == 422 | hrr == 423 | hrr == 426  | hrr ==431 | hrr == 432 | hrr == 438 | hrr == 440 | hrr == 443 | hrr == 445 | hrr == 449 | hrr == 457
