function data = menarchedata(makeplot)

% Simulate data agreeing with the Warsaw study, but with bmi and smoking added 
meanage = [9.21, 10.21, 10.58, 10.83, 11.08, 11.33, 11.58, ...
    11.83, 12.08, 12.33, 12.58, 12.83, 13.08, 13.33, 13.58, 13.83, 14.08, ...
    14.33, 14.58, 14.83, 15.08, 15.33, 15.58, 15.83];

bounds = [9.00, meanage];
binsizes = [376, 200, 93, 120, 90, 88, 105, 111, 100, 93, 100, 108, ...
    99, 106, 105, 117, 98, 97, 120, 102, 122, 111, 94, 114];
y = [0, 0, 0, 2, 2, 5, 10, 17, 16, 29, 39, 51, 47, 67, 81, 88, 79, ...
    90, 113, 95, 117, 107, 92, 112];   

age = zeros(sum(binsizes),1);
Y = age;
for i=1:length(binsizes)
    indices = (sum(binsizes(1:(i-1)))+1):sum(binsizes(1:i));
    age(indices) ...
        = sort(bounds(i) + rand(binsizes(i),1)*(bounds(i+1)-bounds(i)));
    Y(indices) = zeros(binsizes(i),1);
    P = sum(binsizes(1:(i-1))) + randperm(binsizes(i), y(i));
    Y(P) = 1;    
end

smoke = binornd(1,(age-9)/20);

bmidata = [[18.9; 21.3; 21.6; 22.3; 17.7; 18.9; 19.6; 19.5], ...
   ones(8,1), [1;1;1;1;0;0;0;0], [11.5; 12.5; 13.5; 14.5; 11.5; 12.5; 13.5; 14.5]];
B = regress(bmidata(:,1), bmidata(:,2:4));
bmi = B(1) + B(2)*Y + B(3)*age + randn(sum(binsizes),1);

% now bin these
xage = age;
xbmi = bmi;
xsmoke = smoke;
xage(xage<meanage(1)) = meanage(1);
for i = 2:24
    a = meanage(i-1);
    b = meanage(i);
    xage((xage>a).*(xage<b)==1) = b;
end

m1 = quantile(xbmi,.25);
%m2 = median(xbmi);
m3 = quantile(xbmi, .75);
mean1 = round(mean(xbmi(xbmi<m1)),2);
mean2 = round(mean(xbmi((xbmi>m1).*(xbmi<m3)==1)),2);
mean3 = round(mean(xbmi(xbmi>m3)),2);

xbmi(abs(xbmi-mean1)<min(abs(xbmi-mean2),abs(xbmi-mean3))) = mean1;
xbmi(((xbmi>mean1).*(abs(xbmi-mean2)<abs(xbmi-mean3)))==1) = mean2;
xbmi(xbmi>mean2) = mean3;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Set the matrix F of regressors, obtain its qr decomposition, set the
% design space (= rows of X)
% Columns of F are
% 1 age; 2 smoke; 3 nosmoke; 4 bmi; 5 age*smoke 6 age*bmi
% x0 = dose, X1 = (x11,x12,x13) = (method1, method2, cov), X2 = (method1, cov)
BMI = kron(ones(24,1),[mean1;mean1;mean2;mean2;mean3;mean3]);
SMOKE = kron(ones(72,1), [1;0]);
NOSMOKE = 1-SMOKE;
AGE = zeros(144,1);
for k = 1:24
    AGE((6*(k-1)+1):(6*k)) = meanage(k)*ones(6,1);
end

X = [AGE, SMOKE, BMI];
F = [AGE, SMOKE, NOSMOKE, BMI, AGE.*SMOKE, AGE.*BMI];
N = size(F,1);
props = zeros(N,1);
for k = 1:N
    props(k) = mean(Y((xage==X(k,1)).*(xsmoke==X(k,2)).*(xbmi==X(k,3))==1));
    if isnan(props(k))
        props(k) = mean(Y((xage==X(k,1))==1));
    end
    if (props(k) == 0)
        props(k) = mean((xsmoke==X(k,2)).*(xbmi==X(k,3))==1);
    end
end
props(props==0) = min(props(props>0));
props(props==1) = max(props(props<1));

data = [X props];
if (makeplot)
    figure(1)    
    subplot(1,2,1)
    hold on
    scatter(age(smoke==1), bmi(smoke==1), 'red');
    scatter(age(smoke==0), bmi(smoke==0), 'blue');
    legend('smoke = 1','smoke = 0', 'Location', 'northwest')
    xlabel('age')
    ylabel('bmi')
    hold off
    
    subplot(1,2,2)
    hold on
    scatter(age(Y==1), bmi(Y==1), 'red');
    scatter(age(Y==0), bmi(Y==0), 'blue');
    legend('Y = 1','Y = 0', 'Location', 'northwest')
    xlabel('age')
    ylabel('bmi')
    hold off
    
end

