-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0_swed_massage.html
executable file
·1052 lines (985 loc) · 85 KB
/
0_swed_massage.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<title>Modern Sweden Data Wrangling</title>
<script src="library/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="library/bootstrap-3.3.5/css/paper.min.css" rel="stylesheet" />
<script src="library/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="library/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="library/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="library/navigation-1.1/tabsets.js"></script>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
</style>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
height: auto;
}
</style>
<script src="library/auto_tab_first_section.js"></script>
</head>
<body>
<div style="width:100%;height:200px;background-image:url('library/header/swed.jpg');background-size:cover;"></div>
<div class="container-fluid main-container"><div class="row"><div class="col-md-4"><a href="index.html"><i class="glyphicon glyphicon-th-list"></i> Back to index</a></div></div></div>
<div class="container-fluid main-container">
<div id="swedish-contemporary-data-paternal-age---fitness" class="section level1 tab-content">
<h1>Swedish contemporary data Paternal age -> Fitness</h1>
<div id="description-of-data" class="section level2">
<h2>Description of data</h2>
<div id="the-swedish-multi-generation-register" class="section level3">
<h3>The Swedish Multi-Generation Register</h3>
<p>The Swedish Multi-Generation Register was established in 1960 and includes index persons born since 1932 that were alive in 1960. The register also includes the parents of the index persons. The parents are included in the register even if they were not born after 1932 or alive in 1960. The coverage of parents of individuals born in Sweden is approximately 98 % for fathers and 100 % for mothers around 1960 (see graph below).</p>
</div>
<div id="left-truncation" class="section level3">
<h3>Left truncation</h3>
<p>Left truncation refers to data that is retrospectively limited resulting in exclusion of study subjects. The truncation is dependent on birth strata. Still, matching cases and controls on birth year does not automatically take care of problems due to left truncation. In these studies, the data might be truncated regarding parental ages. Individuals born before the initiation of the Multi-Generation Register in 1932 are excluded due to left truncation of parental age data. However, the Multi-Generation Register includes parental information of individuals born since 1932 and ages of their parents are not extensively subjected to left truncation. In order to collate a sample consisting of 3 generations, parents have to be born after 1932. As a result, older parents are more likely to be excluded than younger parents. These problems may be addressed with several approaches in sensitivity analyses</p>
</div>
<div id="left-censoring" class="section level3">
<h3>Left censoring</h3>
<p>When an event occurs before the individual comes under observation, this is called left censoring. Studies on paternal age and number of offspring might be subjected to left censoring regarding number of children because children born before 1932 are censored. Different sensitivity analyses can be used to examine potential problems with left censoring.</p>
<div id="difference-between-left-truncation-and-left-censoring" class="section level4">
<h4>Difference between left truncation and left censoring</h4>
<ul>
<li>If a man has all his children before 1932 he will be excluded due to left truncation.</li>
<li>If a man has some children before 1932 and others after 1932 this man will be included as well as children born after 1932 who were alive in 1960. However, the children born before 1932 will be excluded due to left censoring.</li>
</ul>
</div>
</div>
<div id="right-censoring" class="section level3">
<h3>Right censoring</h3>
<p>When an event occurs after the observation time end-point, this is called right censoring. Studies on paternal age in association to fertility might be subjected to left censoring regarding number of children because all children born after December 31 st 2009 are censored. However, problems with censoring can be adjusted for by using Cox regression or conditional logistic regression.</p>
</div>
<div id="other-issues" class="section level3">
<h3>Other issues</h3>
<ul>
<li>Lopnrs are unique for each individual. However, an individual can occur several times, either as fathers, grandfathers or children.</li>
<li>Some observations consist of two generations. Others have complete data for three generations.</li>
<li>Educational level classification: 1. Elementary school,< 9 y 2. Compulsory school, 9 y 3. Upper secondary, < 3 y 4. Upper secondary, 3 y 5. Postsecondary, < 3 y 6. University graduate, ≥ 3 y 7. University postgraduate (PhD)</li>
<li>Lopnr=id number (mor=mother, far=father, mormor=maternal grandmother, morfar=maternal grandfather, farmor= paternal grandmother, farfar= paternal grandfather )</li>
<li>Kon = sex</li>
</ul>
</div>
</div>
<div id="wrangling" class="section level2">
<h2>Wrangling</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">source</span>(<span class="st">"0__helpers.R"</span>)
<span class="kw">library</span>(zoo)
<span class="co"># bsub -q fat -W 48:00 -n 1 Rscript -e "setwd('/usr/users/rarslan/updated_data/'); filebase = '0_swed_massage'; knitr::knit(input = paste0(filebase,'.Rmd'), output = paste0(filebase,'.md'));cat(readLines(paste0(filebase,'.md')), sep = '\n')"</span>
opts_chunk$<span class="kw">set</span>(<span class="dt">cache=</span>F,<span class="dt">cache.lazy=</span>F,<span class="dt">tidy=</span><span class="ot">FALSE</span>,<span class="dt">autodep=</span><span class="ot">TRUE</span>,<span class="dt">dev=</span><span class="kw">c</span>(<span class="st">'png'</span>,<span class="st">'pdf'</span>),<span class="dt">fig.width=</span><span class="dv">12</span>,<span class="dt">fig.height=</span><span class="fl">7.5</span>,<span class="dt">out.width=</span><span class="st">'1440px'</span>,<span class="dt">out.height=</span><span class="st">'900px'</span>)</code></pre></div>
<div id="calculating-mating-success" class="section level3">
<h3>Calculating mating success</h3>
<p>Info about marriage is called CIVIL. You can be listed as G (married), OG (never married), S (divorced) or Ä (widow/widower). There is also a variable called CIVILANTAR which describes how many years you have had the same CIVIL. This info is updated annually and we have information from 1990-2009.</p>
<p>I have created a file where I just extracted all data about individual marriage status (including all women and men). We actually have info dating back to 1960 for another data source called FOB (folkbokföringsregistret). This data was collected in 1960, 1970, 1975, 1980, 1985 and 1990. FOB does not include CIVILANTAR (number of years with the same CIVIL status).</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># system.time({</span>
<span class="co"># swed_civil60_85 = read_sas("~/Downloads/civil_1960_1985.sas7bdat")</span>
<span class="co"># swed_civil60_85 = data.table(swed_civil60_85)</span>
<span class="co"># })</span>
<span class="co"># system.time({</span>
<span class="co"># swed_civil90_09 = read_sas("~/Downloads/civil_1990_2009.sas7bdat")</span>
<span class="co"># swed_civil90_09 = data.table(swed_civil90_09)</span>
<span class="co"># })</span>
<span class="co"># setkey(swed_civil60_85, LOPNR, year)</span>
<span class="co"># setkey(swed_civil90_09, LOPNR, year)</span>
<span class="co"># </span>
<span class="co"># # swed_civil60_85 = head(swed_civil60_85, 1e5)</span>
<span class="co"># # swed_civil90_09 = head(swed_civil90_09, 1e5)</span>
<span class="co"># </span>
<span class="co"># swed_civil = rbind(swed_civil60_85,swed_civil90_09)</span>
<span class="co"># setkey(swed_civil, LOPNR, year) # sort by person, then year</span>
<span class="co"># save(swed_civil,file="data/swed_civil.rdata")</span>
<span class="co"># rm(swed_civil60_85, swed_civil90_09) # free memory</span>
<span class="kw">load</span>(<span class="st">"data/swed_civil.rdata"</span>)
<span class="co"># tail(swed_civil, 100)</span>
<span class="co"># swed_civil[LOPNR == 9862, ]</span>
<span class="co"># swed_civil[LOPNR == 7, ]</span>
<span class="co"># swed_civil[LOPNR==8602, ]</span>
<span class="kw">crosstabs</span>(swed_civil$kod)</code></pre></div>
<pre><code>## swed_civil$kod
## 0G Ä EP G OG RP S SP
## 1 13243746 1177 81992711 69182406 38685 19884023 7090</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># buggy patterns: some people go back to being "never married". This is logically impossible and and seems to be due to some data error where unmarried people are catalogued as divorced in the early years</span>
swed_civil[, kod_fixed :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(kod==<span class="st">"OG"</span>,<span class="st">"OG"</span>,<span class="ot">NA_character_</span>)] <span class="co"># we set up a variable with only the OG codes, everything else is missing</span>
swed_civil[, kod_fixed :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(kod_fixed, LOPNR, <span class="dt">FUN =</span> function(kod_fixed) { <span class="kw">na.locf</span>(kod_fixed, <span class="dt">na.rm=</span>F, <span class="dt">fromLast =</span> T) })] <span class="co"># we carry the OGs backward, so that everybody is always unmarried until the most recent time they were counted as unmarried (implicitly assuming that recent data is more reliable)</span>
swed_civil[<span class="kw">is.na</span>(kod_fixed), kod_fixed :<span class="er">=</span><span class="st"> </span>kod] <span class="co"># for all years that weren't followed by a "never married status", we take the old codes</span>
swed_civil[, status_end :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>] <span class="co"># find the points in the time series where the civil status changes</span>
swed_civil[, status_end :<span class="er">=</span><span class="st"> </span><span class="kw">lead</span>(kod_fixed) !=<span class="st"> </span>kod_fixed] <span class="co"># whenever the code changes, obviously (divorces, spousal death, marriages)</span>
swed_civil[, status_end :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(<span class="kw">c</span>(!<span class="kw">is.na</span>(<span class="kw">diff</span>(CIVILANTAR)) &<span class="st"> </span><span class="kw">diff</span>(CIVILANTAR) <<span class="st"> </span><span class="kw">diff</span>(year), F), <span class="ot">TRUE</span>, status_end)] <span class="co"># whenever the number of years with the same status changes (could be divorced and remarried within less than the year difference). Should theoretically encompass all the above cases, but we don't trust this.</span>
swed_civil[, status_end :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(<span class="kw">lead</span>(LOPNR) ==<span class="st"> </span>LOPNR, status_end, T)] <span class="co"># whenever the person changes, that is a status end too, it's basically their "last seen as"</span>
swed_civil[<span class="kw">nrow</span>(swed_civil), status_end :<span class="er">=</span><span class="st"> </span><span class="ot">TRUE</span>] <span class="co"># last row</span>
<span class="co"># now we try to count marriages with all available information</span>
swed_civil[, marriage_count :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(kod_fixed ==<span class="st"> "G"</span>, <span class="dv">1</span>, <span class="dv">0</span>)] <span class="co"># if you marry, you gain one, OG = constant (for the sake of comparability and because legislation related to this is rather recent, we also treat registered partnerships as 0s)</span>
#####################
### special cases ###
#####################
<span class="co"># first row for each person: if someone starts out as widow(er)/divorcee, then count as 1</span>
swed_civil[!<span class="kw">duplicated</span>(LOPNR) &<span class="st"> </span>kod_fixed %in%<span class="st"> </span><span class="kw">c</span>(<span class="st">"S"</span>,<span class="st">"Ä"</span>), marriage_count :<span class="er">=</span><span class="st"> </span><span class="dv">1</span>]
<span class="co"># if someone goes from being OG straight to being divorced or widowed (esp in the early data, when assessments is five-yearly)</span>
swed_civil[, marriage_count :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(
<span class="kw">ave</span>(kod_fixed, LOPNR, <span class="dt">FUN =</span> function(kod_fixed) { !<span class="kw">is.na</span>(<span class="kw">lag</span>(kod_fixed)) &<span class="st"> </span><span class="kw">lag</span>(kod_fixed) ==<span class="st">"OG"</span> &<span class="st"> </span>kod_fixed %in%<span class="st"> </span><span class="kw">c</span>(<span class="st">"S"</span>,<span class="st">"Ä"</span>) }),
<span class="dv">1</span>, <span class="co"># is S/Ä right after OG? count as 1, not -1</span>
marriage_count)] <span class="co"># other wise keep value</span>
<span class="co"># now we count "cumulative" spouses (number of spouses had in whole life in given year)</span>
swed_civil[, cumul_spouses :<span class="er">=</span><span class="st"> </span><span class="ot">NA_real_</span>]
swed_civil[status_end ==<span class="st"> </span><span class="ot">TRUE</span>, cumul_spouses :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(marriage_count,LOPNR, <span class="dt">FUN =</span> cumsum)] ## we only use the times when the status ends
<span class="kw">crosstabs</span>(swed_civil[<span class="kw">lead</span>(LOPNR) !=<span class="st"> </span>LOPNR, cumul_spouses]) <span class="co"># personal maxima</span></code></pre></div>
<pre><code>## swed_civil[lead(LOPNR) != LOPNR, cumul_spouses]
## 0 1 2 3 4 5 6 7 8
## 4462254 6944642 1107224 108534 7255 486 58 6 4
## 9
## 1</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed_civil_s =<span class="st"> </span>swed_civil %>%
<span class="kw">rename</span>(<span class="dt">idIndividu =</span> LOPNR) %>%<span class="st"> </span><span class="co"># now make one row per person</span>
<span class="st"> </span><span class="kw">group_by</span>(idIndividu) %>%<span class="st"> </span>
<span class="st"> </span><span class="kw">summarise</span>(
<span class="dt">marriage_codes =</span> <span class="kw">paste</span>(<span class="kw">sort</span>(<span class="kw">unique</span>(kod_fixed)), <span class="dt">collapse=</span><span class="st">","</span>),
<span class="dt">ever_married_narrow =</span> <span class="kw">ifelse</span>(<span class="kw">any</span>(kod_fixed ==<span class="st"> 'G'</span>), <span class="dv">1</span>, <span class="dv">0</span>), <span class="co"># this is the simplest (fewest errors?) way to determine if someone was ever married</span>
<span class="dt">ever_married =</span> <span class="kw">ifelse</span>(<span class="kw">any</span>(kod_fixed %in%<span class="st"> </span><span class="kw">c</span>(<span class="st">'G'</span>,<span class="st">'S'</span>,<span class="st">'Ä'</span>)), <span class="dv">1</span>, <span class="dv">0</span>), <span class="co"># this is the inclusive way to determine if someone was ever married, includes people for whom we only know they were divorced. only slight discrepancy with the above after error correction</span>
<span class="dt">years_married =</span> <span class="kw">sum</span>(CIVILANTAR[ kod_fixed ==<span class="st"> "G"</span> &<span class="st"> </span>status_end ==<span class="st"> </span>T]), <span class="co"># for years married we take a more conservative approach and don't put so much effort into reconstructing unobserved marriages</span>
<span class="dt">spouses =</span> <span class="kw">max</span>(cumul_spouses,<span class="dt">na.rm=</span>T), <span class="co"># for each person we take lifetime number of spouses</span>
<span class="dt">times_divorced =</span> <span class="kw">sum</span>(kod_fixed ==<span class="st"> "S"</span> &<span class="st"> </span>status_end ==<span class="st"> </span>T), <span class="co"># count S</span>
<span class="dt">year_of_first_marriage =</span> <span class="kw">ifelse</span>(<span class="kw">any</span>(marriage_count ==<span class="st"> </span><span class="dv">1</span>), <span class="kw">min</span>(year[marriage_count ==<span class="st"> </span><span class="dv">1</span>], <span class="dt">na.rm=</span>T), <span class="ot">NA_real_</span>), <span class="co"># won't work for everyone, we're not using years_married to count back</span>
<span class="dt">times_widowed =</span> <span class="kw">sum</span>(kod_fixed ==<span class="st"> "Ä"</span> &<span class="st"> </span>status_end ==<span class="st"> </span>T), <span class="co"># count Ä</span>
<span class="dt">years_unmarried =</span> <span class="kw">sum</span>(CIVILANTAR[ kod_fixed ==<span class="st"> "OG"</span> &<span class="st"> </span>status_end ==<span class="st"> </span>T])
) %>%<span class="st"> </span><span class="kw">data.table</span>()
swed_civil_s$ever_divorced =<span class="st"> </span><span class="kw">ifelse</span>(swed_civil_s$times_divorced ><span class="st"> </span><span class="dv">1</span>, <span class="dv">1</span>,<span class="dv">0</span>)
<span class="co"># qplot(years_married, data=swed_civil_s)</span>
<span class="co"># qplot(years_married, data=swed_civil_s) + xlim(1,NA)</span>
<span class="co"># qplot(ever_married, data=swed_civil_s)</span>
<span class="co"># qplot(years_married,years_unmarried, geom = 'jitter', data=swed_civil_s)</span>
<span class="kw">rm</span>(swed_civil) <span class="co"># free memory</span>
<span class="kw">crosstabs</span>(~<span class="st"> </span>years_married +<span class="st"> </span>ever_married, <span class="dt">data=</span>swed_civil_s)</code></pre></div>
<pre><code>## ever_married
## years_married 0 1
## 0 4044284 931033
## 1 0 124615
## 2 0 126998
## 3 0 124607
## 4 0 119152
## 5 0 113206
## 6 0 103153
## 7 0 98574
## 8 0 92754
## 9 0 95272
## 10 0 85548
## 11 0 78329
## 12 0 76730
## 13 0 76345
## 14 0 74645
## 15 0 74236
## 16 0 73153
## 17 0 75402
## 18 0 72494
## 19 0 75613
## 20 0 146811
## 21 0 71191
## 22 0 66565
## 23 0 62447
## 24 0 62338
## 25 0 60659
## 26 0 58970
## 27 0 58562
## 28 0 58223
## 29 0 59036
## 30 0 58535
## 31 0 57761
## 32 0 59905
## 33 0 64158
## 34 0 66150
## 35 0 67932
## 36 0 62780
## 37 0 63975
## 38 0 66159
## 39 0 71019
## 40 0 76885
## 41 0 81136
## 42 0 86306
## 43 0 89134
## 44 0 89034
## 45 0 87608
## 46 0 83593
## 47 0 84392
## 48 0 83845
## 49 0 82298
## 50 0 80882
## 51 0 80379
## 52 0 78960
## 53 0 75088
## 54 0 73158
## 55 0 68787
## 56 0 63761
## 57 0 57827
## 58 0 52369
## 59 0 45741
## 60 0 39196
## 61 0 33060
## 62 0 27362
## 63 0 21284
## 64 0 16323
## 65 0 11383
## 66 0 7973
## 67 0 5362
## 68 0 3403
## 69 0 2165
## 70 0 1262
## 71 0 772
## 72 0 425
## 73 0 242
## 74 0 156
## 75 0 76
## 76 0 72
## 77 0 51
## 78 0 25
## 79 0 32
## 80 0 15
## 81 0 8
## 82 0 18
## 83 0 7
## 84 0 11
## 85 0 11
## 86 0 8
## 87 0 7
## 88 0 5
## 89 0 2
## 90 0 6
## 91 0 2
## 92 0 7
## 93 0 3
## 94 0 3
## 95 0 5
## 96 0 4
## 97 0 6
## 98 0 3
## 99 0 4
## 100 0 3
## 101 0 1
## 102 0 1
## 103 0 2
## 104 0 4
## 105 0 4
## 106 0 3
## 107 0 8
## 108 0 1
## 109 0 2
## 112 0 3
## 113 0 1
## 114 0 2
## 115 0 1
## 116 0 2
## 117 0 1
## 119 0 3
## 120 0 2
## 121 0 2
## 122 0 2
## 123 0 2
## 128 0 1
## <NA> 0 2859153</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(~<span class="st"> </span>years_married +<span class="st"> </span>spouses, <span class="dt">data=</span>swed_civil_s)</code></pre></div>
<pre><code>## spouses
## years_married 0 1 2 3 4 5 6
## 0 4462254 511697 1328 37 1 0 0
## 1 0 121773 2737 99 6 0 0
## 2 0 122576 4238 169 14 1 0
## 3 0 118532 5775 279 18 2 0
## 4 0 111640 7059 406 44 1 2
## 5 0 104733 7934 493 42 4 0
## 6 0 94095 8400 604 50 2 2
## 7 0 89120 8698 674 71 10 1
## 8 0 83234 8734 702 73 9 2
## 9 0 85814 8618 760 71 8 0
## 10 0 75543 9146 774 75 8 2
## 11 0 69042 8466 731 79 10 1
## 12 0 67362 8523 775 60 9 1
## 13 0 67446 8163 669 56 8 2
## 14 0 65780 8162 641 56 4 2
## 15 0 65717 7860 604 47 8 0
## 16 0 64911 7597 584 51 6 4
## 17 0 67471 7367 530 31 2 1
## 18 0 64749 7193 514 36 2 0
## 19 0 67908 7243 422 38 1 1
## 20 0 138316 8044 416 34 0 1
## 21 0 63546 7230 395 17 3 0
## 22 0 58721 7450 371 22 0 1
## 23 0 54541 7483 406 16 1 0
## 24 0 53448 8462 402 24 2 0
## 25 0 51262 8936 435 22 3 0
## 26 0 49134 9369 450 17 0 0
## 27 0 48666 9417 451 26 2 0
## 28 0 47830 9837 530 26 0 0
## 29 0 47737 10717 559 22 0 1
## 30 0 46902 11047 565 21 0 0
## 31 0 46771 10352 612 26 0 0
## 32 0 49413 9811 654 27 0 0
## 33 0 54023 9413 696 25 1 0
## 34 0 55026 10375 722 26 1 0
## 35 0 56866 10291 749 23 3 0
## 36 0 52225 9658 877 20 0 0
## 37 0 53569 9527 860 19 0 0
## 38 0 56069 9205 860 23 2 0
## 39 0 28793 41485 726 15 0 0
## 40 0 26328 49888 661 8 0 0
## 41 0 28443 52180 505 8 0 0
## 42 0 31047 54821 427 11 0 0
## 43 0 33427 55365 339 3 0 0
## 44 0 36188 52603 243 0 0 0
## 45 0 38940 48504 163 1 0 0
## 46 0 40889 42613 88 2 1 0
## 47 0 44364 39980 48 0 0 0
## 48 0 47262 36576 7 0 0 0
## 49 0 76196 6096 6 0 0 0
## 50 0 80656 221 5 0 0 0
## 51 0 80207 168 4 0 0 0
## 52 0 78799 160 0 1 0 0
## 53 0 74956 128 3 1 0 0
## 54 0 73038 118 2 0 0 0
## 55 0 68679 105 3 0 0 0
## 56 0 63694 63 4 0 0 0
## 57 0 57757 68 2 0 0 0
## 58 0 52296 70 3 0 0 0
## 59 0 45689 50 2 0 0 0
## 60 0 39150 44 2 0 0 0
## 61 0 33022 38 0 0 0 0
## 62 0 27336 26 0 0 0 0
## 63 0 21250 31 3 0 0 0
## 64 0 16292 29 2 0 0 0
## 65 0 11368 14 1 0 0 0
## 66 0 7958 10 5 0 0 0
## 67 0 5346 15 1 0 0 0
## 68 0 3389 12 2 0 0 0
## 69 0 2149 15 1 0 0 0
## 70 0 1249 10 3 0 0 0
## 71 0 762 9 1 0 0 0
## 72 0 415 10 0 0 0 0
## 73 0 236 3 3 0 0 0
## 74 0 144 10 2 0 0 0
## 75 0 68 8 0 0 0 0
## 76 0 65 7 0 0 0 0
## 77 0 42 7 2 0 0 0
## 78 0 20 4 1 0 0 0
## 79 0 26 4 2 0 0 0
## 80 0 12 2 1 0 0 0
## 81 0 5 2 1 0 0 0
## 82 0 6 11 1 0 0 0
## 83 0 6 1 0 0 0 0
## 84 0 3 8 0 0 0 0
## 85 0 2 7 2 0 0 0
## 86 0 4 4 0 0 0 0
## 87 0 1 6 0 0 0 0
## 88 0 0 2 3 0 0 0
## 89 0 2 0 0 0 0 0
## 90 0 3 3 0 0 0 0
## 91 0 0 2 0 0 0 0
## 92 0 2 5 0 0 0 0
## 93 0 0 3 0 0 0 0
## 94 0 0 3 0 0 0 0
## 95 0 0 5 0 0 0 0
## 96 0 0 4 0 0 0 0
## 97 0 0 6 0 0 0 0
## 98 0 0 3 0 0 0 0
## 99 0 0 4 0 0 0 0
## 100 0 0 3 0 0 0 0
## 101 0 0 1 0 0 0 0
## 102 0 0 1 0 0 0 0
## 103 0 0 2 0 0 0 0
## 104 0 0 4 0 0 0 0
## 105 0 0 4 0 0 0 0
## 106 0 0 3 0 0 0 0
## 107 0 0 8 0 0 0 0
## 108 0 0 1 0 0 0 0
## 109 0 0 2 0 0 0 0
## 112 0 0 3 0 0 0 0
## 113 0 0 1 0 0 0 0
## 114 0 0 2 0 0 0 0
## 115 0 0 1 0 0 0 0
## 116 0 0 2 0 0 0 0
## 117 0 0 1 0 0 0 0
## 119 0 0 2 0 0 1 0
## 120 0 0 2 0 0 0 0
## 121 0 0 2 0 0 0 0
## 122 0 0 2 0 0 0 0
## 123 0 0 2 0 0 0 0
## 128 0 0 1 0 0 0 0
## <NA> 0 2463454 305660 83777 5850 371 34
## spouses
## years_married 7 8 9
## 0 0 0 0
## 1 0 0 0
## 2 0 0 0
## 3 1 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## 7 0 0 0
## 8 0 0 0
## 9 0 1 0
## 10 0 0 0
## 11 0 0 0
## 12 0 0 0
## 13 0 1 0
## 14 0 0 0
## 15 0 0 0
## 16 0 0 0
## 17 0 0 0
## 18 0 0 0
## 19 0 0 0
## 20 0 0 0
## 21 0 0 0
## 22 0 0 0
## 23 0 0 0
## 24 0 0 0
## 25 1 0 0
## 26 0 0 0
## 27 0 0 0
## 28 0 0 0
## 29 0 0 0
## 30 0 0 0
## 31 0 0 0
## 32 0 0 0
## 33 0 0 0
## 34 0 0 0
## 35 0 0 0
## 36 0 0 0
## 37 0 0 0
## 38 0 0 0
## 39 0 0 0
## 40 0 0 0
## 41 0 0 0
## 42 0 0 0
## 43 0 0 0
## 44 0 0 0
## 45 0 0 0
## 46 0 0 0
## 47 0 0 0
## 48 0 0 0
## 49 0 0 0
## 50 0 0 0
## 51 0 0 0
## 52 0 0 0
## 53 0 0 0
## 54 0 0 0
## 55 0 0 0
## 56 0 0 0
## 57 0 0 0
## 58 0 0 0
## 59 0 0 0
## 60 0 0 0
## 61 0 0 0
## 62 0 0 0
## 63 0 0 0
## 64 0 0 0
## 65 0 0 0
## 66 0 0 0
## 67 0 0 0
## 68 0 0 0
## 69 0 0 0
## 70 0 0 0
## 71 0 0 0
## 72 0 0 0
## 73 0 0 0
## 74 0 0 0
## 75 0 0 0
## 76 0 0 0
## 77 0 0 0
## 78 0 0 0
## 79 0 0 0
## 80 0 0 0
## 81 0 0 0
## 82 0 0 0
## 83 0 0 0
## 84 0 0 0
## 85 0 0 0
## 86 0 0 0
## 87 0 0 0
## 88 0 0 0
## 89 0 0 0
## 90 0 0 0
## 91 0 0 0
## 92 0 0 0
## 93 0 0 0
## 94 0 0 0
## 95 0 0 0
## 96 0 0 0
## 97 0 0 0
## 98 0 0 0
## 99 0 0 0
## 100 0 0 0
## 101 0 0 0
## 102 0 0 0
## 103 0 0 0
## 104 0 0 0
## 105 0 0 0
## 106 0 0 0
## 107 0 0 0
## 108 0 0 0
## 109 0 0 0
## 112 0 0 0
## 113 0 0 0
## 114 0 0 0
## 115 0 0 0
## 116 0 0 0
## 117 0 0 0
## 119 0 0 0
## 120 0 0 0
## 121 0 0 0
## 122 0 0 0
## 123 0 0 0
## 128 0 0 0
## <NA> 4 2 1</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(~<span class="st"> </span>spouses +<span class="st"> </span>ever_married, <span class="dt">data=</span>swed_civil_s)</code></pre></div>
<pre><code>## ever_married
## spouses 0 1
## 0 4044284 417970
## 1 0 6944643
## 2 0 1107224
## 3 0 108534
## 4 0 7255
## 5 0 486
## 6 0 58
## 7 0 6
## 8 0 4
## 9 0 1</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">props</span>(~<span class="st"> </span>ever_married, swed_civil_s)</code></pre></div>
<pre><code>## ever_married
## 0 1
## 0.3202 0.6798</code></pre>
</div>
<div id="transforming-data" class="section level3">
<h3>Transforming data</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># swed = read.csv("/Volumes/Elements/swed.csv")</span>
<span class="co"># save(swed, file = "data/swed2.rdata")</span>
<span class="kw">load</span>(<span class="dt">file=</span><span class="st">"data/swed2.rdata"</span>)
<span class="co"># load(file="data/swed.rdata")</span>
<span class="co"># char_vars = sapply(swed,is.character) | sapply(swed, is.factor)</span>
<span class="co"># swed[,char_vars] = plyr::colwise(function(x) { </span>
<span class="co"># type.convert(as.character(x),as.is=TRUE)</span>
<span class="co"># })(swed[,char_vars])</span>
<span class="kw">names</span>(swed) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idIndividu"</span>, <span class="st">"idPere"</span>, <span class="st">"idMere"</span>, <span class="st">"idPaternalGrandfather"</span>, <span class="st">"idPaternalGrandmother"</span>, <span class="st">"idMaternalGrandfather"</span>, <span class="st">"idMaternalGrandmother"</span>, <span class="st">"sex"</span>, <span class="st">"education"</span>, <span class="st">"education_Father"</span>, <span class="st">"education_Mother"</span>, <span class="st">"education.MaternalGrandmother"</span>, <span class="st">"education.MaternalGrandfather"</span>, <span class="st">"education.PaternalGrandfather"</span>, <span class="st">"education.PaternalGrandmother"</span>, <span class="st">"byear"</span>, <span class="st">"byear.Father"</span>, <span class="st">"byear.Mother"</span>, <span class="st">"byear.PaternalGrandfather"</span>, <span class="st">"byear.MaternalGrandfather"</span>, <span class="st">"byear.PaternalGrandmother"</span>, <span class="st">"byear.MaternalGrandmother"</span>, <span class="st">"dyear"</span>, <span class="st">"dyear.Father"</span>, <span class="st">"dyear.Mother"</span>, <span class="st">"dyear.PaternalGrandfather"</span>, <span class="st">"dyear.MaternalGrandfather"</span>, <span class="st">"dyear.PaternalGrandmother"</span>, <span class="st">"dyear.MaternalGrandmother"</span>)
swed =<span class="st"> </span><span class="kw">as.data.table</span>(swed)
<span class="co"># swed = head(swed, 1e6)</span>
dupes =<span class="st"> </span>swed$idIndividu[<span class="kw">duplicated</span>(swed$idIndividu)]
<span class="co"># swed[LOPNR %in% dupes, ]</span>
swed =<span class="st"> </span>swed[!<span class="kw">duplicated</span>(swed$idIndividu),]
swed =<span class="st"> </span><span class="kw">merge</span>(swed, swed_civil_s, <span class="dt">by =</span> <span class="st">"idIndividu"</span>, <span class="dt">all.x =</span> T)
<span class="kw">rm</span>(swed_civil_s)
swed[, age_at_first_marriage :<span class="er">=</span><span class="st"> </span>year_of_first_marriage -<span class="st"> </span>byear]
<span class="co"># qplot( year_of_first_marriage-byear, years_unmarried,data=swed, geom = 'jitter', alpha = I(0.3)) + facet_wrap(~ byear)</span>
<span class="co"># qplot(byear, times_divorced, data = swed, stat = 'summary', fun.data = 'mean_sdl')</span>
<span class="co"># qplot(byear, times_widowed, data = swed, stat = 'summary', fun.data = 'mean_sdl')</span>
<span class="co"># qplot(byear, times_divorced, data = swed, stat = 'summary', fun.data = 'mean_sdl')</span>
<span class="kw">sort</span>(<span class="kw">props</span>(~<span class="st"> </span>swed$marriage_codes))</code></pre></div>
<pre><code>## swed$marriage_codes
## Ä,EP,G,OG,RP,S Ä,EP,G,RP,S Ä,EP,OG,S Ä,G,RP,S Ä,OG,RP
## 1.219e-07 1.219e-07 1.219e-07 1.219e-07 1.219e-07
## EP,G,OG EP,S OG,S,SP Ä,G,OG,RP,SP EP,G,OG,RP,S
## 1.219e-07 1.219e-07 1.219e-07 2.438e-07 2.438e-07
## EP,G,OG,RP,SP EP,G,OG,S EP,OG,RP,SP RP Ä,EP,OG,RP
## 2.438e-07 2.438e-07 2.438e-07 2.438e-07 3.658e-07
## Ä,G,OG,RP Ä,EP,G,OG,RP EP,G,RP,S RP,S Ä,EP,OG
## 4.877e-07 7.315e-07 8.535e-07 8.535e-07 9.754e-07
## OG,RP,S,SP G,OG,S,SP G,OG,SP EP,OG G,RP,S,SP
## 1.097e-06 1.219e-06 1.707e-06 1.829e-06 2.317e-06
## OG,SP OG,RP,S EP,G,OG,RP EP,OG,RP G,RP,S
## 2.560e-06 2.682e-06 5.121e-06 5.243e-06 9.998e-06
## Ä,OG,S G,OG,RP,S,SP Ä G,OG,RP,SP G,OG,RP,S
## 1.146e-05 1.402e-05 2.499e-05 3.572e-05 4.218e-05
## OG,RP,SP G,OG,RP Ä,S OG,RP Ä,OG
## 7.425e-05 1.196e-04 1.264e-04 3.138e-04 4.022e-04
## Ä,G,OG,S S Ä,G,OG OG,S Ä,G
## 5.116e-04 4.196e-03 4.804e-03 5.114e-03 5.699e-03
## Ä,G,S G G,OG,S G,S G,OG
## 8.315e-03 1.695e-02 7.141e-02 8.610e-02 2.035e-01
## <NA> OG
## 2.176e-01 3.746e-01</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sort</span>(<span class="kw">props</span>(~<span class="st"> </span>swed$ever_married))</code></pre></div>
<pre><code>## swed$ever_married
## <NA> 0 1
## 0.2176 0.3750 0.4074</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">sort</span>(<span class="kw">props</span>(~<span class="st"> </span>swed[swed$byear <<span class="st"> </span><span class="dv">1960</span>,]$ever_married))</code></pre></div>
<pre><code>## swed[swed$byear < 1960, ]$ever_married
## <NA> 0 1
## 0.009808 0.180113 0.810079</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[<span class="kw">is.na</span>(ever_married), ever_married :<span class="er">=</span><span class="st"> </span><span class="dv">0</span>] <span class="co"># have verified that those missing from the civil file did not get far</span>
<span class="kw">miss_frac</span>(swed)</code></pre></div>
<pre><code>## [1] 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># make male var.</span>
<span class="kw">crosstabs</span>(swed$sex)</code></pre></div>
<pre><code>## swed$sex
## 1 2
## 4206572 3995396</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[, male :<span class="er">=</span><span class="st"> </span><span class="kw">Recode</span>(sex,<span class="st">"'1'=1;'2'=0;else=NA"</span>)]
swed$sex =<span class="st"> </span><span class="ot">NULL</span></code></pre></div>
<pre><code>## Warning in alloc.col(x): Attempt to reduce allocation from 105 to 104
## ignored. Can only increase allocation via shallow copy.</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># crosstabs(swed$dyear)</span>
swed[, age :<span class="er">=</span><span class="st"> </span>dyear-byear ]
<span class="co"># qplot(swed$age)</span>
<span class="co"># library(mgcv)</span>
<span class="co"># qplot(data=swed,dyear,ifelse(age < 1, 0, 1)) + geom_smooth()</span>
<span class="co"># qplot(data=swed,dyear,age) + geom_smooth()</span>
<span class="co"># we believe that those who don't have a death date by 2009 and who were born after 1962, were alive by 2009. this probably incorrectly treats some out-migrants.</span>
swed[byear>=<span class="dv">1962</span> &<span class="st"> </span><span class="kw">is.na</span>(dyear) &<span class="st"> </span>byear <<span class="st"> </span><span class="dv">2009</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="kw">as.integer</span>(<span class="dv">2009</span>)-byear]
swed[!<span class="kw">is.na</span>(age), age_at_least :<span class="er">=</span><span class="st"> </span>age]
swed[age <<span class="st"> </span><span class="dv">0</span>, age :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[, survive1y :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(age_at_least <=<span class="st"> </span><span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">1</span>) ] <span class="co"># we are conservative in our survival estimation and would estimate 1;11 years as dead in the first year in the worst case (because we only have birth years, not months) </span>
swed[, survive5y :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(age_at_least <=<span class="st"> </span><span class="dv">5</span>, <span class="dv">0</span>, <span class="dv">1</span>) ]
swed[,surviveR :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(age_at_least <=<span class="st"> </span><span class="dv">15</span>, <span class="dv">0</span>, <span class="dv">1</span>)]
<span class="kw">crosstabs</span>(~<span class="st"> </span>survive1y +<span class="st"> </span>byear, <span class="dt">data =</span> swed)</code></pre></div>
<pre><code>## byear
## survive1y 1932 1933 1934 1935 1936 1937 1938 1939 1940
## 0 0 0 0 0 0 0 0 0 0
## 1 7741 13121 14622 14447 14020 13313 12895 12809 11585
## <NA> 19311 36165 44551 49456 54761 57923 62556 66790 68187
## byear
## survive1y 1941 1942 1943 1944 1945 1946 1947 1948 1949
## 0 0 0 0 0 0 1 4 4 3
## 1 11739 12449 13055 12974 11955 11066 10013 9076 8061
## <NA> 73866 87560 97828 106179 108817 110600 109889 109842 106781
## byear
## survive1y 1950 1951 1952 1953 1954 1955 1956 1957 1958
## 0 1 2 1 6 3 2 3 5 2
## 1 7394 6539 6050 5656 5185 5067 4755 4360 4011
## <NA> 102753 99036 100723 101876 98548 101314 102850 102763 101746
## byear
## survive1y 1959 1960 1961 1962 1963 1964 1965 1966 1967
## 0 4 158 1399 1372 1385 1431 1324 1263 1268
## 1 3662 3546 3515 110197 116267 126024 126374 126463 125090
## <NA> 102396 100194 103991 0 0 0 0 0 0
## byear
## survive1y 1968 1969 1970 1971 1972 1973 1974 1975 1976
## 0 1219 980 984 1004 929 826 856 700 666
## 1 117406 111587 113380 117419 116096 114157 114944 108898 103430
## <NA> 0 0 0 0 0 0 0 0 0
## byear
## survive1y 1977 1978 1979 1980 1981 1982 1983 1984 1985
## 0 610 578 594 569 538 523 575 568 577
## 1 101411 98826 102194 103602 100940 100500 99680 102345 106941
## <NA> 0 0 0 0 0 0 0 0 0
## byear
## survive1y 1986 1987 1988 1989 1990 1991 1992 1993 1994
## 0 525 572 565 585 649 627 560 483 434
## 1 110470 113027 120897 124457 132234 131360 129139 123121 118274
## <NA> 0 0 0 0 0 0 0 0 0
## byear
## survive1y 1995 1996 1997 1998 1999 2000 2001 2002 2003
## 0 377 337 296 271 265 283 306 275 296
## 1 109307 101234 96507 95279 94383 96533 96688 100907 103633
## <NA> 0 0 0 0 0 0 0 0 0
## byear
## survive1y 2004 2005 2006 2007 2008 2009
## 0 284 253 272 243 109526 256
## 1 105393 105367 109071 109358 23 0
## <NA> 0 0 0 0 0 105725</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># bugs probably due to re-assigned personnummer</span>
<span class="kw">nrow</span>(swed[!<span class="kw">is.na</span>(education) &<span class="st"> </span>survive1y ==<span class="st"> </span><span class="dv">0</span>,])</code></pre></div>
<pre><code>## [1] 37</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[!<span class="kw">is.na</span>(education) &<span class="st"> </span>survive1y ==<span class="st"> </span><span class="dv">0</span>, survive1y :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[!<span class="kw">is.na</span>(education) &<span class="st"> </span>survive1y ==<span class="st"> </span><span class="dv">0</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[!<span class="kw">is.na</span>(education) &<span class="st"> </span>survive5y ==<span class="st"> </span><span class="dv">0</span>, survive5y :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[!<span class="kw">is.na</span>(education) &<span class="st"> </span>survive5y ==<span class="st"> </span><span class="dv">0</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
<span class="kw">nrow</span>(swed[education ><span class="st"> </span><span class="dv">2</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>,])</code></pre></div>
<pre><code>## [1] 35</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[education ><span class="st"> </span><span class="dv">2</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, surviveR :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[education ><span class="st"> </span><span class="dv">2</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[, paternalage :<span class="er">=</span><span class="st"> </span>byear -<span class="st"> </span>byear.Father]
swed[, idParents :<span class="er">=</span><span class="st"> </span><span class="kw">str_c</span>(idMere,<span class="st">"_"</span>,idPere)]
swed[, maternalage :<span class="er">=</span><span class="st"> </span>byear -<span class="st"> </span>byear.Mother]
<span class="co"># qplot(maternalage,paternalage,data=swed,geom="jitter",alpha=I(0.1))</span>
<span class="kw">cor.test</span>(swed$maternalage,swed$paternalage)</code></pre></div>
<pre><code>##
## Pearson's product-moment correlation
##
## data: swed$maternalage and swed$paternalage
## t = 3100, df = 8100000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.7365 0.7372
## sample estimates:
## cor
## 0.7368</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed$born =<span class="st"> </span><span class="dv">1</span></code></pre></div>
</div>
<div id="calculating-reproductive-success" class="section level3">
<h3>Calculating reproductive success</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed =<span class="st"> </span>swed[<span class="kw">order</span>(idParents,byear), ]
swed[ , first_child_with_this_partner :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>( !<span class="kw">duplicated</span>(idParents), <span class="dv">1</span>, <span class="dv">0</span>) ]
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'fertile_unions'</span>, <span class="dt">wt_var =</span> <span class="st">"first_child_with_this_partner"</span>)
<span class="kw">crosstabs</span>(~<span class="st"> </span>fertile_unions +<span class="st"> </span>spouses, <span class="dt">data =</span> swed)</code></pre></div>
<pre><code>## spouses
## fertile_unions 0 1 2 3 4 5 6
## 0 2209839 273143 51614 5576 355 20 3
## 1 805108 1921584 654070 53067 2582 108 11
## 2 102010 169292 94192 25748 2136 161 8
## 3 10435 15535 10292 4039 583 62 12
## 4 1141 1643 1305 606 125 18 4
## 5 151 241 177 104 28 4 2
## 6 26 38 54 20 4 1 1
## 7 10 4 13 5 2 1 1
## 8 1 2 3 1 0 0 0
## 9 0 1 0 0 0 0 0
## 10 0 0 1 0 0 0 0
## 11 0 1 0 0 0 0 0
## spouses
## fertile_unions 7 8 9 <NA>
## 0 0 0 0 1784037
## 1 2 1 0 584
## 2 1 0 0 12
## 3 1 2 0 0
## 4 1 0 0 1
## 5 1 0 1 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## 11 0 0 0 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'children'</span>, <span class="dt">wt_var =</span> <span class="st">"born"</span>)
swed$children.per.spouse =<span class="st"> </span>swed$children/swed$spouses
swed$children.per.spouse[<span class="kw">which</span>(swed$spouses==<span class="dv">0</span>)] =<span class="st"> </span><span class="ot">NA</span>
<span class="co"># qplot(swed$children.per.spouse)</span>
<span class="co"># qplot(swed$children)</span>
<span class="co"># qplot(swed$spouses)</span>
changeNAto1 =<span class="st"> </span>function(x) { <span class="kw">colwise</span>(function(x) { <span class="kw">ifelse</span>(<span class="kw">is.na</span>(x), <span class="dv">1</span>, x)})(x) }
<span class="co"># bugs probably owing to re-assigment of personnummer</span>
<span class="kw">nrow</span>(swed[children><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>,])</code></pre></div>
<pre><code>## [1] 25</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[children><span class="dv">0</span> &<span class="st"> </span>survive1y ==<span class="st"> </span><span class="dv">0</span>, survive1y :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[children><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, surviveR :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[children><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
<span class="kw">nrow</span>(swed[spouses ><span class="st"> </span><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>,])</code></pre></div>
<pre><code>## [1] 3690</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[spouses ><span class="st"> </span><span class="dv">0</span> &<span class="st"> </span>survive1y ==<span class="st"> </span><span class="dv">0</span>, survive1y :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[spouses ><span class="st"> </span><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, survive5y :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[spouses ><span class="st"> </span><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, surviveR :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed[spouses ><span class="st"> </span><span class="dv">0</span> &<span class="st"> </span>surviveR ==<span class="st"> </span><span class="dv">0</span>, age_at_least :<span class="er">=</span><span class="st"> </span><span class="ot">NA</span>]
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'children.surviving1y'</span>, <span class="dt">wt_var =</span> <span class="st">'survive1y'</span>)
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'children.surviving5y'</span>, <span class="dt">wt_var =</span> <span class="st">'survive5y'</span>)
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'children.survivingR'</span>, <span class="dt">wt_var =</span> <span class="st">'surviveR'</span>)
<span class="co"># swed = count_and_merge(swed, 'children.spouses', wt_var = 'spouses')</span>
<span class="co"># swed = count_and_merge(swed, 'grandchildren.per.spouse', wt_var = 'children.per.spouse')</span>
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'grandchildren'</span>,<span class="dt">wt_var=</span><span class="st">'children'</span>)
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'grandchildren.surviving1y'</span>, <span class="dt">wt_var =</span> <span class="st">'children.surviving1y'</span>)
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'grandchildren.survivingR'</span>, <span class="dt">wt_var =</span> <span class="st">'children.survivingR'</span>)
swed[, dead1y :<span class="er">=</span><span class="st"> </span><span class="kw">ifelse</span>(age_at_least >=<span class="st"> </span><span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">1</span>)]
swed =<span class="st"> </span><span class="kw">count_and_merge</span>(swed, <span class="st">'children.dead1y'</span>, <span class="dt">wt_var =</span> <span class="st">'dead1y'</span>)
swed[, children.wddate :<span class="er">=</span><span class="st"> </span>children.dead1y +<span class="st"> </span>children.surviving1y]
<span class="kw">xtabs</span>(~<span class="st"> </span>(grandchildren><span class="dv">0</span>) +(children><span class="dv">0</span>) +<span class="st"> </span>surviveR,<span class="dt">data=</span>swed,<span class="dt">exclude=</span><span class="ot">NULL</span>, <span class="dt">na.action=</span> na.pass)</code></pre></div>
<pre><code>## , , surviveR = 0
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 1594483 0
## TRUE 0 0
##
## , , surviveR = 1
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 2160586 1563637
## TRUE 0 184535
##
## , , surviveR = NA
##
## children > 0
## grandchildren > 0 FALSE TRUE
## FALSE 568287 828315
## TRUE 0 1302125</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># qplot(swed$children)</span>
<span class="co"># qplot(swed[which(swed$age > 15),]$children )</span>
<span class="co"># qplot(swed$grandchildren)</span>
<span class="co"># qplot(swed[which(swed$age > 15),]$grandchildren )</span>
<span class="co"># qplot(swed$spouses)</span></code></pre></div>
<p>pre-calculate some predictors</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed =<span class="st"> </span>swed[<span class="kw">order</span>(idParents,byear), ]
swed <-<span class="st"> </span><span class="kw">transform</span>(swed, <span class="dt">birthorder =</span> <span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(swed)), swed$idParents, <span class="dt">FUN =</span> seq_along)) <span class="co"># old trick to get birth order, don't know what this does to those with missings for father though</span>
<span class="co"># qplot(swed$birthorder,binwidth=1)</span>
swed <-<span class="st"> </span><span class="kw">transform</span>(swed, <span class="dt">min15.birthorder =</span> <span class="kw">ave</span>(surviveR, idPere, <span class="dt">FUN =</span>function(x) { x[<span class="kw">is.na</span>(x)] =<span class="st"> </span><span class="dv">0</span>
<span class="kw">cumsum</span>(x)
} ))
<span class="kw">xtabs</span>(<span class="dt">data=</span>swed, ~<span class="kw">is.na</span>(birthorder) +<span class="st"> </span><span class="kw">is.na</span>(min15.birthorder))</code></pre></div>
<pre><code>## is.na(min15.birthorder)
## is.na(birthorder) FALSE
## FALSE 8141033
## TRUE 60935</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">table</span>(swed$min15.birthorder,<span class="dt">exclude=</span><span class="ot">NULL</span>)</code></pre></div>
<pre><code>##
## 0 1 2 3 4 5 6 7 8
## 3709277 2463803 1370459 474328 128488 36357 11935 4291 1748
## 9 10 11 12 13 14 15 16 17
## 692 319 145 59 33 18 10 4 1
## 18 <NA>
## 1 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[, nr.siblings :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(born,idParents,<span class="dt">FUN=</span> function(x) { <span class="kw">sum</span>(x,<span class="dt">na.rm=</span>T) } ) -<span class="st"> </span><span class="dv">1</span>]
<span class="co"># qplot(swed$nr.siblings,binwidth=1)</span></code></pre></div>
<p>count dependent sibs</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed =<span class="st"> </span>swed %>%
<span class="st"> </span><span class="kw">group_by</span>(idParents) %>%
<span class="st"> </span><span class="kw">mutate</span>(
<span class="dt">dependent_sibs_f5y =</span> <span class="kw">dependent_sibs_f5y</span>(<span class="dt">survive1y=</span>survive1y, <span class="dt">byear=</span>byear, <span class="dt">dyear=</span>dyear)
) %>%<span class="st"> </span><span class="kw">data.table</span>()</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[, birth.cohort :<span class="er">=</span><span class="st"> </span><span class="kw">year_bins</span>(byear)]
<span class="kw">crosstabs</span>(swed$birth.cohort)</code></pre></div>
<pre><code>## swed$birth.cohort
## 1930-1935 1935-1940 1940-1945 1945-1950 1950-1955 1955-1960 1960-1965
## 135511 358970 495422 596112 533773 532940 569479
## 1965-1970 1970-1975 1975-1980 1980-1985 1985-1990 1990-1995 1995-2000
## 612974 580595 517907 509840 578616 636881 498256
## 2000-2005 2005-2010
## 504598 540094</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed %>%<span class="st"> </span>
<span class="st"> </span><span class="kw">tbl_df</span>() %>%<span class="st"> </span>
<span class="kw">mutate</span>(
<span class="dt">maternal_loss_age =</span> dyear.Mother -<span class="st"> </span>byear
,<span class="dt">maternal_loss_age =</span> <span class="kw">as.numeric</span>(<span class="kw">ifelse</span>(maternal_loss_age >=<span class="st"> </span>-<span class="dv">1</span> &<span class="st"> </span>maternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="dv">0</span>, maternal_loss_age))
,<span class="dt">maternal_loss =</span> <span class="kw">as.character</span>(<span class="kw">cut</span>(maternal_loss_age, <span class="dt">breaks =</span> <span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>,<span class="dv">5</span>,<span class="dv">10</span>,<span class="dv">15</span>,<span class="dv">20</span>,<span class="dv">25</span>,<span class="dv">30</span>,<span class="dv">35</span>,<span class="dv">40</span>,<span class="dv">45</span>), <span class="dt">include.lowest =</span> T ))
,<span class="dt">maternal_loss =</span> <span class="kw">ifelse</span>( maternal_loss_age >=<span class="st"> </span><span class="dv">45</span>, <span class="st">"later"</span>, maternal_loss)
,<span class="dt">maternal_loss =</span> <span class="kw">ifelse</span>(<span class="kw">is.na</span>(maternal_loss_age) |<span class="st"> </span><span class="kw">is.na</span>(maternal_loss), <span class="st">"unclear"</span>, maternal_loss)
,<span class="dt">maternal_loss =</span> <span class="kw">factor</span>(maternal_loss, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">"later"</span>,<span class="st">"[0,1]"</span>, <span class="st">"(1,5]"</span>, <span class="st">"(5,10]"</span>, <span class="st">"(10,15]"</span>, <span class="st">"(15,20]"</span>, <span class="st">"(20,25]"</span>, <span class="st">"(25,30]"</span>, <span class="st">"(30,35]"</span>, <span class="st">"(35,40]"</span>, <span class="st">"(40,45]"</span>, <span class="st">"unclear"</span>))
,<span class="dt">paternal_loss_age =</span> dyear.Father -<span class="st"> </span>byear
,<span class="dt">paternal_loss_age =</span> <span class="kw">as.numeric</span>(<span class="kw">ifelse</span>(paternal_loss_age >=<span class="st"> </span>-<span class="dv">1</span> &<span class="st"> </span>paternal_loss_age <<span class="st"> </span><span class="dv">0</span>, <span class="dv">0</span>, paternal_loss_age))
,<span class="dt">paternal_loss =</span> <span class="kw">as.character</span>(<span class="kw">cut</span>(paternal_loss_age, <span class="dt">breaks =</span> <span class="kw">c</span>(<span class="dv">0</span>,<span class="dv">1</span>,<span class="dv">5</span>,<span class="dv">10</span>,<span class="dv">15</span>,<span class="dv">20</span>,<span class="dv">25</span>,<span class="dv">30</span>,<span class="dv">35</span>,<span class="dv">40</span>,<span class="dv">45</span>), <span class="dt">include.lowest =</span> T ))
,<span class="dt">paternal_loss =</span> <span class="kw">ifelse</span>( paternal_loss_age >=<span class="st"> </span><span class="dv">45</span>, <span class="st">"later"</span>, paternal_loss)
,<span class="dt">paternal_loss =</span> <span class="kw">ifelse</span>(<span class="kw">is.na</span>(paternal_loss_age) |<span class="st"> </span><span class="kw">is.na</span>(paternal_loss), <span class="st">"unclear"</span>, paternal_loss)
,<span class="dt">paternal_loss =</span> <span class="kw">factor</span>(paternal_loss, <span class="dt">levels =</span> <span class="kw">c</span>(<span class="st">"later"</span>,<span class="st">"[0,1]"</span>, <span class="st">"(1,5]"</span>, <span class="st">"(5,10]"</span>, <span class="st">"(10,15]"</span>, <span class="st">"(15,20]"</span>, <span class="st">"(20,25]"</span>, <span class="st">"(25,30]"</span>, <span class="st">"(30,35]"</span>, <span class="st">"(35,40]"</span>, <span class="st">"(40,45]"</span>, <span class="st">"unclear"</span>))
) %>%
<span class="st"> </span><span class="kw">data.table</span>() ->
<span class="st"> </span>swed
<span class="kw">crosstabs</span>(swed$maternal_loss)</code></pre></div>
<pre><code>## swed$maternal_loss
## later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35]
## 1274984 2151 7869 16189 27191 45547 71480 106952 149433
## (35,40] (40,45] unclear
## 204279 208744 6087149</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">crosstabs</span>(swed$paternal_loss)</code></pre></div>
<pre><code>## swed$paternal_loss
## later [0,1] (1,5] (5,10] (10,15] (15,20] (20,25] (25,30] (30,35]
## 1096763 7248 19961 38395 63816 108260 170261 251155 328328
## (35,40] (40,45] unclear
## 394178 339366 5384237</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed$older_siblings =<span class="st"> </span><span class="kw">factor</span>(<span class="kw">ifelse</span>((swed$birthorder -<span class="st"> </span><span class="dv">1</span>) ><span class="st"> </span><span class="dv">4</span>,<span class="st">"5+"</span>, swed$birthorder -<span class="st"> </span><span class="dv">1</span>))
swed$last_born =<span class="st"> </span><span class="kw">ifelse</span>(swed$birthorder ==<span class="st"> </span>swed$nr.siblings, <span class="dv">1</span>, <span class="dv">0</span>)
swed =<span class="st"> </span>swed[<span class="kw">order</span>(swed$idParents,swed$byear), ]
swed <-<span class="st"> </span><span class="kw">transform</span>(swed, <span class="dt">siblings =</span> <span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(swed)), swed$idParents, <span class="dt">FUN =</span> length)-<span class="dv">1</span>) <span class="co"># sibling count</span>
swed <-<span class="st"> </span><span class="kw">transform</span>(swed, <span class="dt">birthorder =</span> <span class="kw">ave</span>(<span class="kw">rep</span>(<span class="ot">NA</span>, <span class="kw">nrow</span>(swed)), swed$idParents, <span class="dt">FUN =</span> seq_along)) <span class="co"># old trick to get birth order, don't know what this does to those with missings for father though</span>
<span class="kw">qplot</span>(swed$birthorder)</code></pre></div>
<pre><code>## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.</code></pre>
<pre><code>## Warning: Removed 60935 rows containing non-finite values (stat_bin).</code></pre>
<p><img src="figure/unnamed-chunk-4-1.png" title="plot of chunk unnamed-chunk-4" alt="plot of chunk unnamed-chunk-4" width="1440px" height="900px" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed$younger_siblings =<span class="st"> </span>swed$siblings +<span class="st"> </span><span class="dv">1</span> -<span class="st"> </span>swed$birthorder</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed =<span class="st"> </span>swed %>%
<span class="st"> </span><span class="kw">group_by</span>(idParents) %>%
<span class="st"> </span><span class="kw">mutate</span>(
<span class="dt">younger_sibs_ad_5y =</span> <span class="kw">younger_sibs_alive_and_dependent</span>(<span class="dt">survive5y=</span>survive5y, <span class="dt">byear=</span>byear, <span class="dt">dyear=</span>dyear) ,
<span class="dt">older_sibs_ad_5y =</span> <span class="kw">older_sibs_alive_and_dependent</span>(<span class="dt">survive5y=</span>survive5y, <span class="dt">byear=</span>byear, <span class="dt">dyear=</span>dyear),
<span class="dt">dependent_sibs_f5y =</span> <span class="kw">dependent_sibs_f5y</span>(<span class="dt">survive1y=</span>survive1y, <span class="dt">byear=</span>byear, <span class="dt">dyear=</span>dyear)
) %>%<span class="st"> </span><span class="kw">data.table</span>()</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">swed[, paternalage :<span class="er">=</span><span class="st"> </span>paternalage/<span class="dv">10</span>]
swed[, maternalage :<span class="er">=</span><span class="st"> </span>maternalage/<span class="dv">10</span>]
swed =<span class="st"> </span><span class="kw">recenter.pat</span>(<span class="kw">recenter.pat</span>(swed), <span class="dt">what =</span> <span class="st">"maternalage"</span>)
min_na =<span class="st"> </span>function(x) { <span class="kw">ifelse</span>(<span class="kw">all</span>(<span class="kw">is.na</span>(x)), <span class="ot">NA</span>, <span class="kw">min</span>(x,<span class="dt">na.rm=</span>T) ) }
max_na =<span class="st"> </span>function(x) { <span class="kw">ifelse</span>(<span class="kw">all</span>(<span class="kw">is.na</span>(x)), <span class="ot">NA</span>, <span class="kw">max</span>(x,<span class="dt">na.rm=</span>T) ) }
swed =<span class="st"> </span>swed[<span class="kw">order</span>(idPere),]
swed[, paternalage_at_1st_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(paternalage, idPere, <span class="dt">FUN =</span> min_na)]
swed[, paternalage_at_last_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(paternalage, idPere, <span class="dt">FUN =</span> max_na)]
swed =<span class="st"> </span>swed[<span class="kw">order</span>(idMere),]
swed[, maternalage_at_1st_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(maternalage, idMere, <span class="dt">FUN =</span> min_na)]
swed[, maternalage_at_last_sib :<span class="er">=</span><span class="st"> </span><span class="kw">ave</span>(maternalage, idMere, <span class="dt">FUN =</span> max_na)]
fathers =<span class="st"> </span>swed[!<span class="kw">duplicated</span>(idPere), <span class="kw">list</span>(idPere, paternalage_at_1st_sib, paternalage_at_last_sib)]
<span class="kw">names</span>(fathers) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idIndividu"</span>,<span class="st">"age_at_1st_child"</span>, <span class="st">"age_at_last_child"</span>)
mothers =<span class="st"> </span>swed[!<span class="kw">duplicated</span>(idMere), <span class="kw">list</span>(idMere, maternalage_at_1st_sib, maternalage_at_last_sib)]
<span class="kw">names</span>(mothers) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"idIndividu"</span>,<span class="st">"age_at_1st_child"</span>, <span class="st">"age_at_last_child"</span>)
parents =<span class="st"> </span><span class="kw">rbind</span>(fathers, mothers)
swed =<span class="st"> </span><span class="kw">merge</span>(swed, parents, <span class="dt">by =</span> <span class="st">"idIndividu"</span>, <span class="dt">all.x =</span> T)
<span class="kw">rm</span>(parents,fathers,mothers)