
	cap mkdir "$global_outputs"
	
****************** PREP AUXILIARY FILES **** ADDED BY NONA ON MAY 22 2016 **************

** adding exchange rate and deflator data for Taiwan, China added on April 9, 2025 by Nona, data is from WDI team
	drop _all
	set obs 6
	gen year = 2009 in 1
	replace year = 2018 + _n if _n > 1
	gen wbcode = "TWN"
	gen wbcountryname = "Taiwan China"

	gen gdp_deflator = 100.256 if year == 2020
	replace gdp_deflator = 102.287 if year == 2021
	replace gdp_deflator = 104.385 if year == 2022
	replace gdp_deflator = 106.998 if year == 2023
	
	replace gdp_deflator = 95.656 if year == 2009

	append using "$deflators_WDI_data"
	tempfile deflators_WDI_data
	save `deflators_WDI_data'
	global deflators_WDI_data = "`deflators_WDI_data'"

	drop _all
	set obs 6
	gen year = 2009 in 1
	replace year = 2018 + _n if _n > 1
	gen wbcode = "TWN"
	gen CountryName = "Taiwan China"

	gen Official_ER_LCU_per_USD = 29.583 if year == 2020
	replace Official_ER_LCU_per_USD = 28.024 if year == 2021
	replace Official_ER_LCU_per_USD = 29.806 if year == 2022
	replace Official_ER_LCU_per_USD = 31.158 if year == 2023
	
	append using "$exchange_rates_WDI_data"
	tempfile exchange_rates_WDI_data
	save `exchange_rates_WDI_data'
	global exchange_rates_WDI_data = "`exchange_rates_WDI_data'"
********** end of adding Taiwan, China


	import excel "$global_inputs_replication\\add stratification.xlsx", sheet("strata info") firstrow clear
		destring idstd, replace
		keep idstd StraSector
	save "$global_outputs\\temp_strasector.dta", replace
	

	
	copy "$global_inputs_replication\\MAIN input for indicators.xlsx" "$global_outputs\in_MAIN input for indicators_`date'.xlsx", replace
	
	import excel "$global_inputs_replication\\MAIN input for indicators.xlsx", sheet("standardization") firstrow clear
		keep country fielddate other_survey panel_data methodology fyear n3_l2_year notes deflator_d2 deflator_d2ford2FYminus1 country_abr d2_l1_last_year n3_l2_last_year d2_n3_last_month exchangerated2 exchangerated2ford2FYminus exchangeraten3 exchangeraten3forn3FYminus unitconverter
		order country fielddate other_survey panel_data methodology fyear n3_l2_year notes deflator_d2 deflator_d2ford2FYminus1 d2_l1_last_year n3_l2_last_year country_abr d2_n3_last_month exchangerated2 exchangerated2ford2FYminus exchangeraten3 exchangeraten3forn3FYminus unitconverter
		rename deflator_d2 deflator_d2_modified
		rename deflator_d2ford2FYminus1 deflator_d2_FYmin1_modified		
		rename exchangerated2 exrate_d2_modified
		rename exchangerated2ford2FYminus exrate_d2_FYmin1_modified
		rename exchangeraten3 exrate_n3_modified
		rename exchangeraten3forn3FYminus exrate_n3_FYmin1_modified
		replace country_abr = trim(country_abr)
		assert country_abr != ""
		
		***** add country_proper variable
			ren country countryxx
			egen country =  ends(countryxx) , punct(2) trim head 

			replace country = "SouthSudan" if country == "Southsudan"
			replace country = "Antigua And Barbuda" if country =="Antiguaandbarbuda"
			replace country = "Central African Republic" if country =="Centralafricanrepublic"
			replace country = "Serbia&Montenegro" if country =="Montenegro"
			replace country = "Serbia&Montenegro" if country =="Serbia"
			replace country = "Srilanka" if country =="SriLanka"
			replace country = "St. Kitts And Nevis" if country =="Stkittsandnevis"
			replace country = "St. Lucia" if country =="Stlucia"
			replace country = "St. Vincent And The Grenadines" if country =="Stvincentandthegrenadines"
			replace country = "Trinidad&Tobago" if country =="Trinidadandtobago"
			replace country = "Lao Pdr" if country =="Lao PDR"
			replace country = "Newguinea" if country =="Papua New Guinea"

			merge m:1 country using "$global_country_gni_pop",keepusing(country_proper) 
			drop if _m == 2
			drop country _m 
			ren countryxx country

		***** add country_official ccode variable
			merge m:1 country_abr using "$global_inputs_replication\\country_officialname.dta"
			drop if _m==2
			drop _m
			
		** New classification variable region_WB added Aug.15.2014 Xie **;
			ren country_abr c_abbr
			merge m:1 c_abbr using "$global_country_region_income",keepus(region_WB)
			drop if _m==2
			ren c_abbr country_abr
			drop _m
			
			sort country
	save "$global_outputs\\fieldwork.dta", replace
	
*************** PREPARE AUXILIARY FILE FOR DEFLATION ************************
		preserve
			keep country_abr d2_l1_last_year n3_l2_last_year exrate_d2_FYmin1_modified exrate_n3_FYmin1_modified
			rename exrate_d2_FYmin1_modified exrate_d2_modified
			rename exrate_n3_FYmin1_modified exrate_n3_modified
			replace d2_l1_last_year = d2_l1_last_year - 1
			replace n3_l2_last_year = n3_l2_last_year - 1
			drop if d2_l1_last_year == . | n3_l2_last_year == .
			duplicates drop
			append using "$global_outputs\\fieldwork.dta"
			drop if other_survey == "THIS IS MICRO"
			save "$global_outputs\\modifications_to_deflators_and_exchange_rates.dta", replace			
		restore
		preserve
			use "$deflators_WDI_data",clear /* NEW WDI data available Nov.1.2015 Xie, http://data.worldbank.org/data-catalog/world-development-indicators */
			keep if year==2009
			ren gdp_deflator gdp_deflator2009
			save "$global_outputs\\temp09.dta",replace
			keep if wbcode == "USA"
			gen wbcode_usa=1
			ren gdp_deflator2009 gdp_deflatorUSA2009
			save "$global_outputs\\temp09USA.dta",replace
		restore
		preserve
			use "$deflators_WDI_data",clear /* NEW WDI data available Nov.1.2015 Xie, http://data.worldbank.org/data-catalog/world-development-indicators */
			keep if wbcode == "USA"
			ren gdp_deflator gdp_deflatorUSA
			save "$global_outputs\\tempUSA.dta",replace
		restore

		keep country_abr
		duplicates drop
		rename country_abr wbcode

		* bring in 2009 deflators
		merge 1:1 wbcode using "$global_outputs\\temp09.dta", keepusing(gdp_deflator2009)
		keep if _m == 1 | _m == 3
		drop _merge
		erase "$global_outputs\\temp09.dta"

		* bring in USA 2009 deflators
		gen wbcode_usa=1
		merge m:1 wbcode_usa using "$global_outputs\\temp09USA.dta", keepusing(gdp_deflatorUSA2009)
		keep if _m == 1 | _m == 3
		drop _merge wbcode_usa
		erase "$global_outputs\\temp09USA.dta"

		* bring in all deflators
		merge 1:m wbcode using "$deflators_WDI_data"
		drop if _merge==2
		tab _merge
		drop _merge

		* bring in all USA deflators
		merge m:1 year using "$global_outputs\\tempUSA.dta"
		drop if _merge==2
		tab _merge
		drop _merge

		* bring in exchange rate data
			clonevar wbcode_orig = wbcode  // added by Nona on April 25, 2019 to accomodate Euro Area countries
			replace wbcode = "EMU" if wbcountryname == "Greece" | wbcountryname == "Cyprus" | wbcountryname == "Malta" | wbcountryname == "Montenegro" | wbcountryname == "Italy" | wbcountryname == "Latvia" | ///
			wbcountryname == "Kosovo" | wbcountryname =="Estonia" | (wbcountryname =="Lithuania" & year >=2015) | wbcountryname =="Slovenia" | wbcountryname =="Portugal" | (wbcountryname =="Slovak Republic" & year >=2009) | ///
			wbcountryname == "Luxembourg" | wbcountryname == "Belgium" | wbcountryname == "Netherlands" | wbcountryname == "Ireland" | wbcountryname == "Austria" | wbcountryname == "Finland" | wbcountryname == "France" | /// 
			wbcountryname == "Germany" | wbcountryname == "Spain"
		merge m:1 wbcode year using "$exchange_rates_WDI_data", keepusing(Official_ER_LCU_per_USD) keep(1 3) /* NEW WDI data available Apr.14.2015 Xie, same source as above */
			replace wbcode = wbcode_orig if wbcode_orig != ""
			drop wbcode_orig
		tab _merge
		drop _merge

		rename year d2_last_clndr_year
		rename gdp_deflator gdp_defl_d2_last_clndr_year 
		rename gdp_deflatorUSA gdp_deflUSA_d2_last_clndr_year
		ren Official_ER_LCU_per_USD exrate_d2_last_clndr_year

		forval i = 1/4 {
			gen year = d2_last_clndr_year - `i'
			
			* bring in all deflators
			merge 1:1 wbcode year using "$deflators_WDI_data"
			drop if _merge==2
			tab _merge
			drop _merge

			* bring in USA deflators by year
			merge m:1 year using "$global_outputs\\tempUSA.dta"
			drop if _merge==2
			tab _merge
			drop _merge

			* bring in exchange rate data
				clonevar wbcode_orig = wbcode // added by Nona on April 25, 2019 to accomodate Euro Area countries
				replace wbcode = "EMU" if wbcountryname == "Greece" | wbcountryname == "Cyprus"  | wbcountryname == "Malta" | wbcountryname == "Montenegro" | wbcountryname == "Italy" | wbcountryname == "Latvia" | ///
				wbcountryname == "Kosovo" | wbcountryname=="Estonia" | (wbcountryname =="Lithuania" & year >=2015) | wbcountryname =="Slovenia" | wbcountryname =="Portugal" | (wbcountryname =="Slovak Republic" & year >=2009) | ///
				wbcountryname == "Luxembourg" | wbcountryname=="Belgium" | wbcountryname == "Netherlands" | wbcountryname == "Ireland" | wbcountryname == "Austria" | wbcountryname == "Finland" | wbcountryname == "France" | /// 
				wbcountryname == "Germany" | wbcountryname == "Spain"
			merge m:1 wbcode year using "$exchange_rates_WDI_data", keepusing(Official_ER_LCU_per_USD) keep(1 3) /* NEW WDI data available Apr.14.2015 Xie, same source as above */
				replace wbcode = wbcode_orig if wbcode_orig != ""
				drop wbcode_orig
			tab _merge
			drop _merge
			
			if `i' == 1 {
				rename year d2_prev_clndr_year
				rename gdp_deflator gdp_defl_d2_prev_clndr_year 
				rename gdp_deflatorUSA gdp_deflUSA_d2_prev_clndr_year
				ren Official_ER_LCU_per_USD exrate_d2_prev_clndr_year
			}
			if `i' == 2 {
				rename year n3_last_clndr_year
				rename gdp_deflator gdp_defl_n3_last_clndr_year 
				rename gdp_deflatorUSA gdp_deflUSA_n3_last_clndr_year
				ren Official_ER_LCU_per_USD exrate_n3_last_clndr_year
			}	
			if `i' == 3 {
				rename year n3_prev_clndr_year
				rename gdp_deflator gdp_defl_n3_prev_clndr_year 
				rename gdp_deflatorUSA gdp_deflUSA_n3_prev_clndr_year
				ren Official_ER_LCU_per_USD exrate_n3_prev_clndr_year
			}
			if `i' == 4 {
				rename year n3_ALT_clndr_year
				rename gdp_deflator gdp_defl_n3_ALT_clndr_year 
				rename gdp_deflatorUSA gdp_deflUSA_n3_ALT_clndr_year
				ren Official_ER_LCU_per_USD exrate_n3_ALT_clndr_year
			}
				
		}
		
*** ovewrite from inputfile where appropriate
	rename wbcode country_abr
	rename d2_last_clndr_year d2_l1_last_year
	rename n3_last_clndr_year n3_l2_last_year	
	merge 1:m country_abr d2_l1_last_year n3_l2_last_year using "$global_outputs\\modifications_to_deflators_and_exchange_rates.dta", keepusing(unitconverter deflator_d2_modified deflator_d2_FYmin1_modified exrate_d2_modified exrate_d2_FYmin1_modified exrate_n3_modified exrate_n3_FYmin1_modified)
	drop if _merge == 2 & unitconverter ==. & deflator_d2_modified ==. & deflator_d2_FYmin1_modified ==. & exrate_d2_modified ==. & exrate_d2_FYmin1_modified ==. & exrate_n3_modified ==. & exrate_n3_FYmin1_modified
	drop if _merge == 2 & d2_l1_last_year - n3_l2_last_year > 2
	replace gdp_defl_d2_last_clndr_year = deflator_d2_modified if gdp_defl_d2_last_clndr_year ==. & deflator_d2_modified != .
	replace exrate_d2_last_clndr_year = exrate_d2_modified if exrate_d2_modified != .
	replace gdp_defl_d2_prev_clndr_year = deflator_d2_FYmin1_modified if gdp_defl_d2_prev_clndr_year ==. & deflator_d2_FYmin1_modified != .
	replace exrate_d2_prev_clndr_year = exrate_d2_FYmin1_modified if exrate_d2_FYmin1_modified != .
	 
	// the next few lines added by Nona on March 24, 2020, USA deflator as of Nov2019 WDI
	replace gdp_deflator2009 = gdp_deflatorUSA2009 if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) & gdp_deflator2009 == .
	replace gdp_deflUSA_d2_last_clndr_year = 117.439937182492    if country_abr == "SOM" & d2_l1_last_year == 2019 & gdp_deflUSA_d2_last_clndr_year == . 
	replace gdp_deflUSA_d2_last_clndr_year = 120.08856935227     if country_abr == "SOM" & d2_l1_last_year == 2020 & gdp_deflUSA_d2_last_clndr_year == . 
	replace gdp_deflUSA_d2_prev_clndr_year = 117.439937182492    if country_abr == "SOM" & d2_l1_last_year == 2020 & gdp_deflUSA_d2_prev_clndr_year == . 
	
	replace gdp_defl_d2_last_clndr_year = gdp_deflUSA_d2_last_clndr_year if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) & gdp_defl_d2_last_clndr_year == .
	replace gdp_defl_d2_prev_clndr_year = gdp_deflUSA_d2_prev_clndr_year if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) & gdp_defl_d2_prev_clndr_year == .
	replace gdp_defl_n3_last_clndr_year = gdp_deflUSA_n3_last_clndr_year if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) & gdp_defl_n3_last_clndr_year == . 
	replace gdp_defl_n3_prev_clndr_year = gdp_deflUSA_n3_prev_clndr_year if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) & gdp_defl_n3_prev_clndr_year == . 
	
	replace exrate_d2_last_clndr_year = 1 if country_abr == "SOM" & d2_l1_last_year == 2018 & exrate_d2_last_clndr_year == . 
	replace exrate_d2_prev_clndr_year = 1 if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019)  
	replace exrate_n3_last_clndr_year = 1 if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) 
	replace exrate_n3_prev_clndr_year = 1 if country_abr == "SOM" & inlist(d2_l1_last_year,2018,2019,2020) 
	// end of lines added on March 24, 2020
	
	replace exrate_n3_last_clndr_year = exrate_n3_modified if exrate_n3_modified != .
	replace exrate_n3_prev_clndr_year = exrate_n3_FYmin1_modified if exrate_n3_FYmin1_modified != .
	replace exrate_n3_ALT_clndr_year = exrate_n3_FYmin1_modified if exrate_n3_FYmin1_modified != .
	drop _merge deflator_d2_modified deflator_d2_FYmin1_modified exrate_d2_modified exrate_d2_FYmin1_modified exrate_n3_modified exrate_n3_FYmin1_modified
	rename country_abr wbcode
	rename d2_l1_last_year d2_last_clndr_year
	rename n3_l2_last_year	n3_last_clndr_year	 
	duplicates drop

*** handle cases with 3 years between fiscal years
		preserve	
	        drop n3_last_clndr_year exrate_n3_last_clndr_year gdp_defl_n3_last_clndr_year gdp_deflUSA_n3_last_clndr_year	
		
			rename n3_prev_clndr_year n3_last_clndr_year
			rename exrate_n3_prev_clndr_year exrate_n3_last_clndr_year
			rename n3_ALT_clndr_year n3_prev_clndr_year
			rename exrate_n3_ALT_clndr_year exrate_n3_prev_clndr_year
			rename gdp_defl_n3_prev_clndr_year gdp_defl_n3_last_clndr_year
			rename gdp_deflUSA_n3_prev_clndr_year gdp_deflUSA_n3_last_clndr_year			
			rename gdp_defl_n3_ALT_clndr_year gdp_defl_n3_prev_clndr_year
			rename gdp_deflUSA_n3_ALT_clndr_year gdp_deflUSA_n3_prev_clndr_year
			
			rename wbcode country_abr
			rename d2_last_clndr_year d2_l1_last_year
			rename n3_last_clndr_year n3_l2_last_year	
			drop unitconverter
			merge 1:m country_abr d2_l1_last_year n3_l2_last_year using "$global_outputs\\modifications_to_deflators_and_exchange_rates.dta", keepusing(unitconverter deflator_d2_modified deflator_d2_FYmin1_modified exrate_d2_modified exrate_d2_FYmin1_modified exrate_n3_modified exrate_n3_FYmin1_modified)
			drop if _merge == 2 & unitconverter ==. & deflator_d2_modified ==. & deflator_d2_FYmin1_modified ==. & exrate_d2_modified ==. & exrate_d2_FYmin1_modified ==. & exrate_n3_modified ==. & exrate_n3_FYmin1_modified
			drop if _merge == 2 & d2_l1_last_year - n3_l2_last_year < 3
			replace gdp_defl_d2_last_clndr_year = deflator_d2_modified if gdp_defl_d2_last_clndr_year ==. & deflator_d2_modified != .
			replace exrate_d2_last_clndr_year = exrate_d2_modified if exrate_d2_modified != .
			replace gdp_defl_d2_prev_clndr_year = deflator_d2_FYmin1_modified if gdp_defl_d2_prev_clndr_year ==. & deflator_d2_FYmin1_modified != .
			replace exrate_d2_prev_clndr_year = exrate_d2_FYmin1_modified if exrate_d2_FYmin1_modified != .
			replace exrate_n3_last_clndr_year = exrate_n3_modified if exrate_n3_modified != .
			replace exrate_n3_prev_clndr_year = exrate_n3_FYmin1_modified if exrate_n3_FYmin1_modified != .
			drop _merge deflator_d2_modified deflator_d2_FYmin1_modified exrate_d2_modified exrate_d2_FYmin1_modified exrate_n3_modified exrate_n3_FYmin1_modified
			rename country_abr wbcode
			rename d2_l1_last_year d2_last_clndr_year
			rename n3_l2_last_year	n3_last_clndr_year	 
			duplicates drop					
			
			save "$global_outputs\\temp1.dta",replace
		restore

		preserve
			gen keep =.
			foreach code in DZA KHM CMR CPV EGY ETH LBN MYS MAR SYR THA PSE {
				replace keep = 1 if wbcode == "`code'"
			}
			keep if keep == 1
			drop keep exrate_n3_last_clndr_year n3_prev_clndr_year exrate_n3_prev_clndr_year n3_ALT_clndr_year exrate_n3_ALT_clndr_year gdp_defl_n3_last_clndr_year gdp_deflUSA_n3_last_clndr_year gdp_defl_n3_prev_clndr_year gdp_deflUSA_n3_prev_clndr_year gdp_defl_n3_ALT_clndr_year gdp_deflUSA_n3_ALT_clndr_year
			replace n3_last_clndr_year = .
			ds3, num
			foreach var in `r(varlist)' {
				egen temp = max(`var'), by(wbcode d2_last_clndr_year n3_last_clndr_year)
				replace `var' = temp if `var' == .
				drop temp
			}
			drop if wbcountryname == ""
			save "$global_outputs\\temp2.dta",replace
		restore

		merge 1:1 wbcode d2_last_clndr_year n3_last_clndr_year using "$global_outputs\\temp1.dta"
		erase "$global_outputs\\temp1.dta"
		drop _merge n3_ALT_clndr_year	exrate_n3_ALT_clndr_year	gdp_defl_n3_ALT_clndr_year	gdp_deflUSA_n3_ALT_clndr_year

		append using "$global_outputs\\temp2.dta"
		sleep 500
		erase "$global_outputs\\temp2.dta"
		erase "$global_outputs\\modifications_to_deflators_and_exchange_rates.dta"
		
		** next 7 lines added by Nona on August 10, 2018 to fix WDI issues, the figures are from the WDI immediately prior to the current one (folder O:\FPDEA\Surveys\_Survey Attributes\Latest WDI\2018-08-07
		replace gdp_deflator2009 = 97.70125461 if wbcode == "SSD" & gdp_deflator2009 == .
		replace gdp_defl_n3_last_clndr_year = 103.0547538 if wbcode == "SSD" & n3_last_clndr_year == 2010 & gdp_defl_n3_last_clndr_year == .
		replace gdp_defl_n3_prev_clndr_year = gdp_defl_n3_last_clndr_year if wbcode == "SSD" & d2_last_clndr_year ==2013 & gdp_defl_n3_prev_clndr_year == .
		
		replace gdp_deflator2009 = 196.0000840981 if wbcode == "DJI" & gdp_deflator2009 == .
		replace gdp_defl_n3_last_clndr_year = 196.0000840981 if wbcode == "DJI" & n3_last_clndr_year == 2009 & gdp_defl_n3_last_clndr_year == .	
		
		replace gdp_defl_n3_prev_clndr_year = gdp_defl_n3_last_clndr_year if wbcode == "DJI" & d2_last_clndr_year ==2012 & gdp_defl_n3_prev_clndr_year == .
		replace gdp_defl_d2_prev_clndr_year = gdp_defl_d2_last_clndr_year if wbcode == "DJI" & d2_last_clndr_year ==2012 & gdp_defl_d2_prev_clndr_year == .
		
		** added by Nona on September 30, 2019 to supply deflators for EUR
		replace exrate_d2_last_clndr_year = 0.904035127950359 if (wbcode == "MLT"|wbcode == "ITA") & exrate_d2_last_clndr_year == . & d2_last_clndr_year == 2019
		replace exrate_d2_prev_clndr_year = 0.904035127950359 if (wbcode == "MLT"|wbcode == "ITA") & exrate_d2_prev_clndr_year == . & d2_last_clndr_year == 2019
		replace exrate_n3_last_clndr_year = 0.904035127950359 if (wbcode == "MLT"|wbcode == "ITA") & exrate_n3_last_clndr_year == . & n3_last_clndr_year == 2017
		replace gdp_defl_d2_last_clndr_year = 116.488668 if (wbcode == "MLT"|wbcode == "ITA") & gdp_defl_d2_last_clndr_year == . & d2_last_clndr_year == 2019
		replace gdp_defl_d2_prev_clndr_year = 116.488668 if (wbcode == "MLT"|wbcode == "ITA") & gdp_defl_d2_prev_clndr_year == . & d2_last_clndr_year == 2019
			
*** transform into base year 2009
		foreach cat in d2_last_clndr_year d2_prev_clndr_year n3_last_clndr_year n3_prev_clndr_year {
			gen defl_adj_`cat'    = gdp_defl_`cat'/gdp_deflator2009
			gen defl_adj_USA_`cat'= gdp_deflUSA_`cat'/gdp_deflatorUSA2009
			drop gdp_defl_`cat' gdp_deflUSA_`cat'
		}
		drop gdp_deflator2009 gdp_deflatorUSA2009
		
		rename d2_last_clndr_year d2_l1_last_year
		rename n3_last_clndr_year n3_l2_last_year
		drop  wbcountryname	

		erase "$global_outputs\\tempUSA.dta"
		save "$global_outputs\\deflation.dta",replace
*************** END OF PREPARING AUXILIARY FILE FOR DEFLATION ***************


** AUXILIARY PARTS FOR ISIC_V4
* Based on Hibret's dofile: "O:\FPDEA\Hibret\Productivity\TFP 2023\ISIC_3.1 to 4 correspondence\do\do_create a crosswalk_HM.do". 
* added on October 9, 2023, by Nona, after consultign with Hibret
	
*===============================================================================
* Create a Crosswalk  
*===============================================================================	
		
	//Input the main file: https://unstats.un.org/unsd/classifications/Econ/tables/ISIC/ISIC31_ISIC4/ISIC31_ISIC4.txt
	
	import delimited using "$global_inputs_replication\\ISIC_ISIC31_ISIC4_ISIC31_ISIC4.txt", varnames(1) clear 

	*import excel using "${file}" , clear firstrow
	
	//Convert both numbers to strings
	ren isic4code ISIC4code
	ren isic31code ISIC31code 
	ren partialisic4  partialISIC4
	ren partialisic31  partialISIC31
	ren detail Detail 
	
	tostring ISIC31code ISIC4code, force replace	
	replace ISIC31code="0"+ISIC31code if strlen(ISIC31code)==3 
	replace ISIC4code="0"+ISIC4code if strlen(ISIC4code)==3 
	
	//Gen ISIC rev 4, 2 digit
	gen ISIC4_2digit=substr(ISIC4code,1,2)
	
	//Make a numeric copy
	destring ISIC4_2digit, g(ISIC4_2digit_num)
	destring ISIC31code, g(ISIC31_4digit_num)
	
	//Gen a variable if the 2-digit ISIC 4 code is in our universe
	gen in_univ_ISIC4=(inrange(ISIC4_2digit_num,10,33) | inrange(ISIC4_2digit_num,41,43) | inrange(ISIC4_2digit_num,45,47)| inrange(ISIC4_2digit_num,49,53) | inrange(ISIC4_2digit_num,69,75) | inlist(ISIC4_2digit_num,55,56,58,61,62,79,95))
	
	//Gen a variable if the 4-digit ISIC 3.1 code is in our universe 
	inlist2 ISIC31_4digit_num, values(1511,1512,1513,1514,1520,1531,1532,1533,1541,1542,1543,1544,1549,1551,1552,1553,1554,1600,1711,1712,1721,1722,1723,1729,1730,1810,1820,1911,1912,1920,2010,2021,2022,2023,2029,2101,2102,2109,2211,2212,2213,2219,2221,2222,2230,2310,2320,2330,2411,2412,2413,2421,2422,2423,2424,2429,2430,2511,2519,2520,2610,2691,2692,2693,2694,2695,2696,2699,2710,2720,2731,2732,2811,2812,2813,2891,2892,2893,2899,2911,2912,2913,2914,2915,2919,2921,2922,2923,2924,2925,2926,2927,2929,2930,3000,3110,3120,3130,3140,3150,3190,3210,3220,3230,3311,3312,3313,3320,3330,3410,3420,3430,3511,3512,3520,3530,3591,3592,3599,3610,3691,3692,3693,3694,3699,3710,3720,4510,4520,4530,4540,4550,5010,5020,5030,5040,5050,5110,5121,5122,5131,5139,5141,5142,5143,5149,5151,5152,5159,5190,5211,5219,5220,5231,5232,5233,5234,5239,5240,5251,5252,5259,5260,5510,5520,6010,6021,6022,6023,6030,6110,6120,6210,6220,6301,6302,6303,6304,6309,6411,6412,6420,7210,7221,7229,7230,7240,7250,7290) name(in_univ_ISIC31)
	
	//Drop if both are out 
	keep if in_univ_ISIC31==1 | in_univ_ISIC4==1
	
	//Duplicates
	duplicates tag ISIC31code, g(ISIC31_multiple)

	//Explanation
	gen explanation=.
	replace explanation=1 if ISIC31_multiple==0 //when the first duplicates is 0, there is a 1-to-1 match
	
	//Crosswalk (procedure differs by explanation code)
	gen crosswalk=.
	replace crosswalk=ISIC4_2digit_num if explanation==1
	
	//Check to see if the 2-digit code is uniform throughout one ISIC31code 
	bys ISIC31code: egen _testmean=mean(ISIC4_2digit_num)
	bys ISIC31code: egen _testmin=min(ISIC4_2digit_num)
	replace explanation=2 if _testmean==_testmin & explanation==. //All four digits 3.1 codes map to the same 2-digit code 
	replace crosswalk=ISIC4_2digit_num if explanation==2
	
	sort ISIC31code ISIC4_2digit_num
	gen tag = cond(_n == 1 | ISIC4_2digit_num != ISIC4_2digit_num[_n-1], 1, 0)
	egen unique_count_X = total(tag), by(ISIC31code )

	//Cases where all the in-universe 2-digit ISIC Rev 4 are the same (i.e., the only ones that differ are out of the ES universe)
	bys ISIC31code: egen _testmean2=mean(ISIC4_2digit_num) if in_univ_ISIC4==1
	bys ISIC31code: egen _testmin2=min(ISIC4_2digit_num)  if in_univ_ISIC4==1
	bys ISIC31code: egen __testmean2=mean(_testmean2)
	replace _testmean2=__testmean2 if _testmean2==. & __testmean2!=.
	drop __testmean2 
	
	bys ISIC31code: egen __testmin2=mean(_testmin2)
	replace _testmin2=__testmin2 if _testmin2==. & __testmin2!=.
	drop __testmin2 
	
	replace explanation=3 if _testmean2==_testmin2 & explanation==. & _testmean2!=. //All four digits 3.1 codes map to the same 2-digit code  
	replace crosswalk=_testmean2  if explanation==3

	//Now, all the codes left have multiple ISIC 4 2-digit per ISIC 3.2 4-digit 
	
	//Figure out the mode ISIC 4 2-digit of each ISIC 3.1 4-digit
	bys ISIC31code: egen _testmode=mode(ISIC4_2digit_num) if explanation==. 
	
	//See if there is a predominant modal value 
	bys ISIC31code: egen count_nonperfect_corr=count(_testmode) if !missing(_testmode)
	bys ISIC31code: egen count_nonperfect_corr_inuniv=count(_testmode) if !missing(_testmode) & in_univ_ISIC4==1
	gen _hold=(_testmode==ISIC4_2digit_num) 
	bys ISIC31code: egen freq_of_mode=total(_hold) if !missing(_testmode)
	bys ISIC31code: egen freq_of_mode_inuniv=total(_hold) if !missing(_testmode) & in_univ_ISIC4==1
	gen mode_pct=freq_of_mode/count_nonperfect_corr
	gen mode_pct_inuniv=freq_of_mode_inuniv/count_nonperfect_corr_inuniv
	
	gen mode_in_univ=(inrange(_testmode,10,33) | inrange(_testmode,41,43) | inrange(_testmode,49,53) | inrange(_testmode,69,75) | inlist(_testmode,55,56,58,61,62,79))
	
	replace explanation=4 if mode_pct>=.5 & mode_in_univ==1
	replace crosswalk=_testmode if explanation==4
	
	*replace explanation=5 if mode_pct_inuniv>=.5 & explanation==. & !missing(mode_pct_inuniv)

	//If the mode is out of universe, take the second mode
	*bys ISIC31code: egen _testmode2=mode(ISIC4_2digit_num) if explanation==. & in_univ_ISIC4==1 & mode_in_univ==0
	
	replace explanation=5 if explanation==. 
	
	label define explanation 1 "Only one ISIC Rev. 3.1 entry" 2 "All ISIC 4 entries are the same" 3 "All in-universe ISIC 4 entries are the same" 4 "Modal ISIC 4 val>=50% and is in-universe" 5 "Manual decision needed", replace 
	label values explanation explanation
	
	drop _*
	
	keep ISIC31code ISIC31_4digit_num partialISIC31 ISIC4code partialISIC4 Detail ISIC4_2digit ISIC4_2digit_num in_univ_ISIC4 in_univ_ISIC31 explanation crosswalk
	
*-------------------------------------------------------------------------------
* II.  Manual coding  
*-------------------------------------------------------------------------------
	
	replace crosswalk = 11 if ISIC31code=="1551"&crosswalk ==.
	replace crosswalk = 13 if ISIC31code=="1721"&crosswalk ==.     
	replace crosswalk = 13 if ISIC31code=="1723"&crosswalk ==.  
	replace crosswalk = 13 if ISIC31code=="1730"&crosswalk ==.  
	replace crosswalk = 14 if ISIC31code=="1810"&crosswalk ==.  
	replace crosswalk = 14 if ISIC31code=="1820"&crosswalk ==.   
	replace crosswalk = 15 if ISIC31code=="1912"&crosswalk ==.   
	replace crosswalk = 15 if ISIC31code=="1920"&crosswalk ==.   
	replace crosswalk = 16 if ISIC31code=="2023"&crosswalk ==.   
	replace crosswalk = 16 if ISIC31code=="2029"&crosswalk ==.   
	replace crosswalk = 16 if ISIC31code=="2029"&crosswalk ==.   
	replace crosswalk = 17 if ISIC31code=="2109"&crosswalk ==.   
	replace crosswalk = 18 if ISIC31code=="2221"&crosswalk ==.   
	*replace crosswalk = 19 if ISIC31code=="2330"&crosswalk ==.   
	replace crosswalk = 20 if ISIC31code=="2330"&crosswalk ==.    //updated by DF (2011 includes "—enrichment of uranium and thorium ores and production of fuel elements for nuclear reactors")
	replace crosswalk = 20 if ISIC31code=="2411"&crosswalk ==.   
	replace crosswalk = 21 if ISIC31code=="2423"&crosswalk ==.   
	replace crosswalk = 20 if ISIC31code=="2429"&crosswalk ==.   
	replace crosswalk = 22 if ISIC31code=="2519"&crosswalk ==.   
	replace crosswalk = 22 if ISIC31code=="2520"&crosswalk ==.   
	replace crosswalk = 23 if ISIC31code=="2610"&crosswalk ==.   
	replace crosswalk = 23 if ISIC31code=="2699"&crosswalk ==.   
	replace crosswalk = 25 if ISIC31code=="2811"&crosswalk ==.   
	replace crosswalk = 25 if ISIC31code=="2812"&crosswalk ==.   
	replace crosswalk = 25 if ISIC31code=="2892"&crosswalk ==.   
	replace crosswalk = 25 if ISIC31code=="2893"&crosswalk ==.   
	replace crosswalk = 25 if ISIC31code=="2899"&crosswalk ==.   
	replace crosswalk = 28 if ISIC31code=="2922"&crosswalk ==.   
	replace crosswalk = 27 if ISIC31code=="3110"&crosswalk ==.   
	replace crosswalk = 27 if ISIC31code=="3140"&crosswalk ==.   
	replace crosswalk = 27 if ISIC31code=="3190"&crosswalk ==.   
	replace crosswalk = 26 if ISIC31code=="3210"&crosswalk ==.   
	replace crosswalk = 26 if ISIC31code=="3220"&crosswalk ==.   
	replace crosswalk = 26 if ISIC31code=="3312"&crosswalk ==.   
	replace crosswalk = 26 if ISIC31code=="3320"&crosswalk ==.   
	replace crosswalk = 29 if ISIC31code=="3420"&crosswalk ==.   
	replace crosswalk = 29 if ISIC31code=="3430"&crosswalk ==.   
	replace crosswalk = 30 if ISIC31code=="3511"&crosswalk ==.   
	replace crosswalk = 30 if ISIC31code=="3512"&crosswalk ==.   
	replace crosswalk = 30 if ISIC31code=="3520"&crosswalk ==.   
	replace crosswalk = 30 if ISIC31code=="3591"&crosswalk ==.   
	replace crosswalk = 30 if ISIC31code=="3599"&crosswalk ==.   
	replace crosswalk = 31 if ISIC31code=="3610"&crosswalk ==.   
	replace crosswalk = 32 if ISIC31code=="3692"&crosswalk ==.   
	replace crosswalk = 32 if ISIC31code=="3694"&crosswalk ==.   
	replace crosswalk = 32 if ISIC31code=="3699"&crosswalk ==.   
	replace crosswalk = 52 if ISIC31code=="6309"&crosswalk ==.   
	*replace crosswalk = 79 if ISIC31code=="9241"&crosswalk ==.   // double check DF: I think it should be 93 (Rev 4) but that's out of our universe 
	replace crosswalk = 45 if ISIC31code=="5020"&crosswalk ==.   
	replace crosswalk = 95 if ISIC31code=="5260"&crosswalk ==.   
	
*----------------------------------------------------------------------------
* III. Manual override 
*----------------------------------------------------------------------------

	g manual_overide = 25 if inlist(ISIC31code,"2813", "2927")
	replace manual_overide = 28 if inlist(ISIC31code,"2911","2914", "2915", "2919", "2921", "2923", "2924", "2925") | inlist(ISIC31code, "2926", "2929","3000")
	replace manual_overide = 26 if inlist(ISIC31code,"3313","3330")
	replace manual_overide = 26 if inlist(ISIC31code,"2930")
	replace manual_overide = 20 if inlist(ISIC31code,"2330")

	replace  manual_overide = 95 if inlist(ISIC31code,"7250")
	replace  manual_overide = 01 if inlist(ISIC31code,"0113")
	replace  manual_overide = 05 if inlist(ISIC31code,"1010", "1020")
	replace manual_overide = 30 if inlist(ISIC31code,"3530")
	replace manual_overide = 32 if inlist(ISIC31code,"3311")

	replace crosswalk = manual_overide if  manual_overide !=. 

	replace explanation = 6 if  manual_overide !=. 
	label define explanation 6 "Manual override", add


	tab crosswalk  
	drop if inlist(crosswalk,1,5)

	drop manual_overide

	ren crosswalk ISIC4Code_2digit

*	save "$global_outputs\\ISIC31_4_2digit_crosswalk", replace  

*------------------------------------------------------------
* Create m:1  
*------------------------------------------------------------

	*ISIC Rev31 4-digit to ISIC Rev4 2-digit 

	keep ISIC31code ISIC4Code_2digit explanation
	duplicates drop  
	merge  1:1  ISIC31code using  "$global_inputs_replication\\isic_rev_31", keep(1 3)
	drop _m
	tostring ISIC4Code_2digit, replace 
	merge m:1 ISIC4Code_2digit  using "$global_inputs_replication\\isic_rev_4_2digit", keep(1 3)
	drop _m  
	sort ISIC31code
	order ISIC31code ISIC31Description ISIC4Code_2digit ISIC4Description

	save "$global_outputs\\ISIC31_4_crosswalk_4digit", replace 
	
*All observations with valid 4-digit ISIC rev 3.1 use the crosswalk "ISIC31_4_crosswalk_4digit" above

*Observations lacking valid 4-digit ISIC rev 4.0 have been manually reviewed by Mengyi. In support of the manual review process, lines below generates a crosswalk between 2-digit ISIC rev 3.1. and 2-digit ISIC rev 4.0. This generates a crosswalk named "ISIC31_4_crosswalk_2digit" 

*Create a crosswalk 2-digit rev 3.1. to 2-digit rev 4.0  

	g ISIC31code_2digit = substr(ISIC31code, 1,2)
	keep ISIC31code_2digit ISIC4Code_2digit ISIC4Description
	duplicates drop  

	drop if ISIC4Code_2digit=="."

	merge m:1 ISIC31code_2digit using "$global_inputs_replication\\isic_rev_31_2digit", keep(1 3)

	order ISIC31code_2digit ISIC31Description_2digit ISIC4Code_2digit ISIC4Description

	bys ISIC31code_2digit: g n=_n 
	keep ISIC31code_2digit ISIC4Code_2digit n  
	reshape wide ISIC4Code_2digit, i(ISIC31code_2digit)  j(n)

*	save  "$global_outputs\\ISIC31_4_crosswalk_2digit", replace 
	
/*
	* the code below documents the process that was used to map the remaining establishments
	
*===============================================================================
* Assign two-digit ISIC rev 4.0 codes for unmapped establishments   
*===============================================================================

	us "O:\FPDEA\Surveys\_Survey Aggregation\_Output\New_Comprehensive_June_26_2023.dta", clear 

	tostring d1a2, gen(ISIC31code)
	merge m:1 ISIC31code using  "$global_outputs\\ISIC31_4_crosswalk_4digit" , gen(mapped)  // a total of 13,954 
	drop if mapped ==2 
	label define mapped  1 "Unmapped" 3 "Crosswalk" 
	lab values mapped mapped  
	tab mapped 

	g repair = strpos(d1a1x,"repair")>0&inlist(d1a2,2921,2923,2924,2925)
	tab d1a2 repair 

*----------------------------------------------
* Surveys excluded  
*---------------------------------------------

	cap drop countryx
	gen countryx = country
	replace countryx = subinstr(countryx," ","",.) // remove spaces from country names
	foreach survey in $exclude_surveys {
		drop if countryx == "`survey'"
	}	
	encode countryx, gen(country_num)
	decode country_num, gen(cty)
	drop countryx country_num
	
	g exclude_surveys = inlist(country, "Zimbabwe2011", "Cambodia2013", "Benin2009", "CaboVerde2009", "Centralafricanrepublic2011", "Chad2009", "Congo2009", "Eritrea2009") | ///
	inlist(country, "Fiji2009", "Gabon2009", "Lesotho2009", "Liberia2009", "Micronesia,Fed.Sts.2009" "Niger2009", "Rwanda2011", "Samoa2009") | ///
	inlist(country,"SierraLeone2009", "Timor-Leste2009", "Togo2009", "Tonga2009", "Vanuatu2009", "Bangladesh2007", "Bhutan2009", "Malawi2009" ) | /// 
	inlist(country,"Pakistan2007" ,"Venezuela2006") 

	tab country exclude_surveys
		
	tab mapped exclude_surveys   // a total of 10,560 unmapped observatios after excluding  

	drop if exclude_surveys ==1    
		
	g description_missing = d1a1x==""   //  
	g isic_missing = d1a2==. 
		

	g year = substr(country, -4,.)
	tab year description_missing 


	*Description for all sectors is available 

	keep if mapped==1 
	tostring isic_v3_1, gen(ISIC31code_2digit)
	tostring d1a2, gen(ISIC31code_4digit)

	replace ISIC31code_2digit = substr(ISIC31code_4digit,1,2) if ISIC31code_2digit=="."

	*Add mapping 2-digit ISIC rev 31 to 2-digit ISIC rev 4.0 

	merge m:1 ISIC31code_2digit using  "$global_outputs\\ISIC31_4_crosswalk_2digit"
	drop if _m==2

	g mapped_2digit = _m==3 

	drop if description_missing==1

	*Drop the countries where data have been collected using ISIC rev 4.0 
	levelsof country if d1a2_v4!=.
	*drop if inlist(country, "Austria2021",  "Bangladesh2022", "Germany2021", "India2022", "Pakistan2022", "Saudi Arabia2022", "Sierra Leone2023", "Timor-Leste2021")

	keep  idstd country stra_sector sector_MS ISIC31code_2digit d1a1x d1a2 isic_v3_1 d1a2 ISIC4Code_2digit1 ISIC4Code_2digit2 ISIC4Code_2digit3 ISIC4Code_2digit4 ISIC4Code_2digit5 mapped*

	sa "${out}\UnmappedForReview", replace  
	export excel using "${out}\UnmappedForReview", firstrow(variables) replace   // this is location: O:\FPDEA\Hibret\Productivity\TFP 2023\ISIC_3.1 to 4 correspondence\out. the file was shared with Mengyi for manual review  

*/

*===============================================================================
* Import manually enterred information for the remaining unmapped establishments 
*===============================================================================

	import excel using "$global_inputs_replication\\UnmappedListReviewed.xls", firstrow  clear 

	ren ISIC4Code_2digitsAssigned  ISIC4Code_2digit_manual
	keep idstd country ISIC4Code_2digit_manual

	tab country if ISIC4Code_2digit_manual==.
	tostring ISIC4Code_2digit_manual, replace  
	replace ISIC4Code_2digit_manual = "" if ISIC4Code_2digit_manual == "."
	
	replace ISIC4Code_2digit_manual = "0" + ISIC4Code_2digit_manual if strlen(ISIC4Code_2digit_manual)==1 

	save "$global_outputs\\ISIC31_4_crosswalk_manual", replace  


***** END OF THE AUXILIARIES PART FOR ISIC_v4



	import excel "$global_inputs_replication\\MAIN input for indicators.xlsx", sheet("standardization") firstrow clear
		keep country panel_data Panel_Data_Details CountrySpecificTopics

	save "$global_outputs\\PanelDetail.dta", replace
