Zelnik 11-term moving average to adjust for digit preference.

Zelnik's method is used to adjust distributions by year of age for digit preference.

zelnik(Value, p, Age)

Arguments

Value	numeric. A vector of demographic counts in single age groups.
p	integer. Perturbation vector used, 1 or 2. Default 1.
Age	integer. Vector of lower bound of age classes.

Value

A vector with the adjusted values.

Details

Single year age groups are assumed. If q=1, age x is affected by ages x-10 through x+10. If q=2, age x is affected by ages x-15 through x+15. A vector is returned of the same length as Value, but the boundary values are imputed with NA. If q=1, ages 0-9 and the final 10 ages are NA. If q=2, ages 0-14 and the final 15 ages are returned as NA. Note results do not sum to the total from the same age-range of Value because smoothing draws from ages outside that range.

References

Gray A (1987). “The Missing Ages: Adjusting for Digit Preferences.” Asian and Pacific Population Forum, 1(2), 11--22.

Examples

# data from gray1987missingages, Table 2, page 21: Aplication of Q1 and
# Q2 linear operators, Bangladesh Census, 1 March 1974-Males.
Pop <- c(941307,1041335,1237034,1411359,1383853,1541942,1321576,1285877,1563448,886705,
      1623998,562924,1485173,543216,771219,903496,686431,370007,942999,250820,
      1023667,200131,688640,222011,281738,1239965,288363,263326,483143,78635,
      1349886,68438,415127,101596,100758,1392434,178633,126351,286520,50836,
      1331036,48995,251153,58393,54995,1033812,68792,72766,175943,28254,
      1038747,32894,136179,37667,38230,596049,52602,36493,74106,16759,
      790643,20596,70109,18044,19891,357491,15253,17489,31057,8481,
      429816,7951,35583,8612,6589,454645)
Age  <-0:75
z1 <- zelnik(Pop,1,Age)
z2 <- zelnik(Pop,2,Age)
if (FALSE) {
plot(Age,Pop,type='l',col = gray(.4))
lines(as.integer(names(z1)),z1,col = "blue",lwd=2)
lines(as.integer(names(z2)),z2,col = "red",lwd = 2)
}
# \dontshow{
# here some units tests:
# a function required to make original table work
perturb <- function(x, pert = 0, pos = 45){
  x[pos] <- x[pos] + pert
  x
}
 p1_answer <- c(rep(NA,10),1151172,1077888,982103,891354,
     828173,762583,718528,649206,579004,535521,552769,572789,
     543715,504766,456185,484627,516545,487585,458525,437117,
     442480,456052,440706,415651,400515,403565,409668,402329,
     391982,392801,362520,327180,322889,318215,316334,296278,
     276958,275057,272350,279354,244466,209374,214178,210162,
     208499,191717,173916,173882,172936,176417,158124,138487,
     141009,142779,145479,rep(NA,11))

 p2_answer <- c(rep(NA,15),769826,711733,663995,623069,
     584797,552352,530538,515578,505073,497808,489977,479740,
     469326,460446,454699,449240,439744,429856,422980,419200,
     414487,403537,389871,378716,370274,361289,348796,334849,
     321901,310028,298308,286242,274796,263983,253143,242464,
     232007,221636,211372,200807,191057,183573,176971,170539,164776,
      rep(NA,16))
 if (FALSE) {
   # using census as given in table. Symmetry makes TR suspect this
   # is due to a single value. Trial and error ensued.
 plot(Age,zelnik(Pop,1,Age) - p1_answer,xlim=c(32,54),type='l')
     # change value of age 44 by removing 40 individuals. Suspected
   # error in original table (or in value used by original authors to carry out calcs)
   # 54995 now is 54955, and we get a match.
 lines(Age,round(zelnik(perturb(Pop,-40,45),1,Age)) - p1_answer, col = "blue")

 # that the same adjustment works for p2 makes me suspect that the error
 # is indeed in the value given for the census count at age 44.
 plot(Age,round(zelnik(Pop,2,Age)) - p2_answer,xlim=c(30,55),type='l')
 lines(Age,round(zelnik(perturb(Pop,-40, 45), 2, Age)) - p2_answer, col = "blue")
 }
 # we use this for the de facto unit test:

d1 <- round(zelnik(perturb(Pop,-40,45),1,Age)) - p1_answer
d2 <- round(zelnik(perturb(Pop,-40,45),2,Age)) - p2_answer
stopifnot(all(d1[!is.na(d1)] == 0))
stopifnot(all(d2[!is.na(d2)] == 0))
# }