Update Maddison Data

library(MaddisonData)

Intro

This vignette describes how to download the latest Maddison Project data and create from it four data sets included in the package MaddisonData: MaddisonCountries, MaddisonData, MaddisonSources, and MaddisonYears.

The Wikipedia article on the Maddison Project says, “Development economist Branko Milanović (writing for the World Bank), development economist Morten Jerven, and billionaire philanthropist Bill Gates have identified the Maddison Project, the Penn World Tables, and World Bank/IMF data (the World Development Indicators), as the three main sources of worldwide economic statistics such as GDP data, with the focus of the Maddison Project being on historical data. Economist Paul Krugman has suggested the Maddison Project as a data source for historical debt, growth, and labor output and productivity data.”

Bolt and van Zanden (2020) say that, “The Maddison database on Historical Statistics of the World Economy has probably the widest coverage of data on GDP per capita across countries and over time currently available … . To compare income levels and developments for this period and set of countries, national income estimates are converted … to a common currency using purchasing power parities (PPPs).”

Get the data

The web site for Maddison project data offers both “Angus Maddison’s unaltered final dataset” and the “Latest Maddison Project Release”.

On 2025-08-28 I saw that the “Latest Maddison Project Release” was dated 2024-09-18. I requested that and found that it was available in either Excel or Stata format. I downloaded the Excel format to my current working directory as mpd2023web.xlsx. (On 2025-08-28 I found a 4 MB file by this name dated 2025-06-03. I changed that name to mpd023web0.xlsx and downloaded ostensibly the same file and got one with 4.9 MB. I doubt if I would have deleted some of the content of the file with date 2025-06-03, though that’s possible. In any event, I plan to ignore this difference for the moment.)

In 2024, I also downloaded “Maddison Database 2010” and compared it with mpd2020.xlsx. I compared the two for numbers for the former USSR, the UK, and the US. I found that the newer data had many numbers the older data didn’t while the older data had a few numbers absent from the newer data. However, it seemed that I would be wise to ignore the older data.

Find the data file

(MadXlsx <- path_package2('^mpd2023.*xlsx$'))
#> character(0)
#> attr(,"searched")
#> [1] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes"             
#> [2] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes/extdata"     
#> [3] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes/inst/extdata"
#> [4] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes"             
#> [5] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes/extdata"     
#> [6] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/vignettes/inst/extdata"
#> [7] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData"                       
#> [8] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/extdata"               
#> [9] "/tmp/RtmpAmXTUe/Rbuild10b629ad6145/MaddisonData/inst/extdata"
(foundData <- (length(MadXlsx)>0))
#> [1] FALSE

If more than one file was found, pick the most recent one.

MadInfo <- file.info(MadXlsx)
cat('Multiple files found.\n')
print(MadInfo)
imax <- which.max(MadInfo$mtime)
Madxlsx <- MadXlsx[imax]

Read the data file

Now read Madxlsx.

MaddisonData0 <- readxl::read_xlsx(Madxlsx, sheet='Full data')
head(MaddisonData0, 2)
tail(MaddisonData0)
MaddisonSources0 <- readxl::read_xlsx(Madxlsx, 
                        sheet='Sources')
names(MaddisonSources0) <- c('ISO', 'years', 'source')
head(MaddisonSources0)
tail(MaddisonSources0)

Countries and country codes?

How many characters in countrycode?

ctryCds <- unique(MaddisonData0$countrycode)
nCds <- length(ctryCds)
table(nchCode <- nchar(ctryCds))

Good: All 3-letter codes.

Let’s concatenate country after countrycode then check to see if there are any ctryCds with multiple country names.

cdCtry <- sort(with(MaddisonData0, unique(
            paste0(countrycode, country))))
if(length(cdCtry)!=nCds){
    stop('some countrycode(s) have more than one', 
         ' country')
}
cd_Ctry <- data.frame(ISO=substring(cdCtry, 1, 3),
                        country=substring(cdCtry, 4))

NOTES:

The variable countrycode in MaddisonData is called ISO in MaddisonSources. We will standardize on ISO for this.
There is a countrycode function in a countrycode package that sounds like it might help translating between country and countrycode. However, a little experimentation exposed “ambiguous” results. Therefore, we will not pursue that further here.

Let’s create MaddisonCountries as a data.frame with columns: countrycode, country, and region:

rownames(cd_Ctry) <- cd_Ctry$countrycode
ctryRgn <- sort(with(MaddisonData0, unique(
            paste0(countrycode, region))))
MaddisonCountries <- cbind(cd_Ctry, 
            region = substring(ctryRgn, 4))
rownames(MaddisonCountries) <- MaddisonCountries$ISO
save(MaddisonCountries, file='MaddisonCountries.rdb', 
       compress=TRUE)  
# CANNOT include MaddisonCountries.rda in .Rbuildignore, 
# so change the name.   
# per R Packages, section 9.7  
# As of 2025-09-24 the R Packages book recommends writing 
# using usethis::use_data 
if(FALSE){  
    tryUse <- try(usethis::use_data(MaddisonData))
}
getwd()

This saves MaddisonCountries in MaddisonCountries.rdb in the working directory. To include that in the package, copy or move that file into the data subdirectory of the package and change the suffix to rda.

We can use subset got translate between country and countrycode or select all countries in selected regions.

subset(MaddisonCountries, ISO=='GBR', country)
subset(MaddisonCountries, grepl('Yugo', country), 1:3)
table(MaddisonCountries$region)
# What are "Western Offshoots"? 
subset(MaddisonCountries, grepl('Of', region), 
                      c(country, ISO))

Now create MaddisonData with countrycode but without country and region.

First identify all rows of MaddisonData with both gdppc and pop NA.

MaddisonData

NAs <- with(MaddisonData0, is.na(gdppc) & is.na(pop))
str(MaddisonData <- MaddisonData0[!NAs, c(1, 4:6)])
names(MaddisonData)[1] <- 'ISO' 

save(MaddisonData, file='MaddisonData.rdb', 
       compress=TRUE)  
# CANNOT include MaddisonData.rda in .Rbuildignore, 
# so change the name.   
# per R Packages, section 9.7  
# As of 2025-09-24 the R Packages book recommends writing 
# using usethis::use_data 
if(FALSE){  
    tryUse <- try(usethis::use_data(MaddisonCountries))
}
getwd()

`MaddisonSources`

The first and last rows in MaddisonSources0 give general info for GDP and population for all countries since 2008 and 1990, respectively.

The format is a row giving ISO and country followed by one or more rows with sources for different date ranges followed by a blank row.

Get the first row for each country:

str(sourceNA <- which(is.na(MaddisonSources0[,1])
        & is.na(MaddisonSources0[,2]) & 
          is.na(MaddisonSources0[,3])))
str(row1 <- c(4, sourceNA[-1]+1))
MaddisonSources0[row1[1:3], ]
MaddisonSources0[tail(row1, 3), ]

Sources for each ISO / country?

ISOsourceNms <- MaddisonSources0[head(row1, -1), 1, drop=TRUE]
nISOsources <- length(ISOsourceNms)
MaddisonSources <- vector('list', nISOsources)
names(MaddisonSources) <- ISOsourceNms
# Get year ranges for each country
MaddisonYears <- data.frame(
  ISO      =character(0), 
  yearBegin=integer(0), 
  yearEnd  =integer(0), 
  sourceNum=integer(0)
  )
for(i in 1:nISOsources){
  rowi <- (row1[i]+1):(row1[i+1]-2)
  MadSrci <- MaddisonSources0[rowi, 2:3]
  MaddisonSources[[i]] <- MadSrci
  MadYrsi0 <- MadDateRanges(MadSrci[, 1, drop=TRUE])
  MadYrsi <- cbind(ISO=ISOsourceNms[i], MadYrsi0)
  MaddisonYears <- rbind(MaddisonYears, 
                              MadYrsi) 
}

MaddisonSources[['EGY']]
MaddisonYears[MaddisonYears$ISO=='EGY', ]

attr(MaddisonSources, 'since2008') <- paste(
  "gdppc since 2008: Total Economy Database (TED) from the", 
  "Conference Board for all countries included in TED and UN",
  "national accounts statistics for all others.")
head(MaddisonSources, 3)
head(MaddisonYears)

tail(MaddisonSources, 3)
tail(MaddisonYears)

save(MaddisonSources, file='MaddisonSources.rdb', 
       compress=TRUE)  
save(MaddisonYears, file='MaddisonYears.rdb', 
       compress=TRUE)

Bibliography

Bolt and Van Zanden (2024), “Maddison style estimates of the evolution of the world economy: A new 2023 update”, Journal of Economic Surveys, 1–41.