-
Notifications
You must be signed in to change notification settings - Fork 3
/
bibliography.bib
313 lines (288 loc) · 33.7 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
% note: make everything here an article to comply with table formatting
@article {DapelloKar2022,
author = {Dapello, Joel and Kar, Kohitij and Schrimpf, Martin and Geary, Robert and Ferguson, Michael and Cox, David D. and DiCarlo, James J.},
author+an = {3=highlight},
title = {Aligning Model and Macaque Inferior Temporal Cortex Representations Improves Model-to-Human Behavioral Alignment and Adversarial Robustness},
elocation-id = {2022.07.01.498495},
year = {2022},
doi = {10.1101/2022.07.01.498495},
publisher = {Cold Spring Harbor Laboratory},
abstract = {While some state-of-the-art artificial neural network systems in computer vision are strikingly accurate models of the corresponding primate visual processing, there are still many discrepancies between these models and the behavior of primates on object recognition tasks. Many current models suffer from extreme sensitivity to adversarial attacks and often do not align well with the image-by-image behavioral error patterns observed in humans. Previous research has provided strong evidence that primate object recognition behavior can be very accurately predicted by neural population activity in the inferior temporal (IT) cortex, a brain area in the late stages of the visual processing hierarchy. Therefore, here we directly test whether making the late stage representations of models more similar to that of macaque IT produces new models that exhibit more robust, primate-like behavior. We conducted chronic, large-scale multi-electrode recordings across the IT cortex in six non-human primates (rhesus macaques). We then use these data to fine-tune (end-to-end) the model {\textquotedblleft}IT{\textquotedblright} representations such that they are more aligned with the biological IT representations, while preserving accuracy on object recognition tasks. We generate a cohort of models with a range of IT similarity scores validated on held-out animals across two image sets with distinct statistics. Across a battery of optimization conditions, we observed a strong correlation between the models{\textquoteright} IT-likeness and alignment with human behavior, as well as an increase in its adversarial robustness. We further assessed the limitations of this approach and find that the improvements in behavioral alignment and adversarial robustness generalize across different image statistics, but not to object categories outside of those covered in our IT training set. Taken together, our results demonstrate that building models that are more aligned with the primate brain leads to more robust and human-like behavior, and call for larger neural data-sets to further augment these gains.Competing Interest StatementThe authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2022/07/04/2022.07.01.498495},
opteprint = {https://www.biorxiv.org/content/early/2022/07/04/2022.07.01.498495.full.pdf},
journal = {bioRxiv}
}
@article{Schrimpf2021Topographic,
title={Topographic ANNs Predict the Behavioral Effects of Causal Perturbations in Primate Visual Ventral Stream IT},
author={Schrimpf, Martin and Mc Grath, Paul and DiCarlo, James},
author+an = {1=highlight},
journal={Champalimaud Research Symposium (CRS21)},
year={2021}
}
@article{Kar2021,
title={Chemogenetic suppression of macaque V4 neurons produces retinotopically specific deficits in downstream IT neural activity patterns and core object recognition behavior},
author={Kar, Kohitij and Schrimpf, Martin and DiCarlo, James},
author+an = {2=highlight},
journal={Vision Sciences Society Annual Meeting},
year={2021}
}
@article{Marques2021,
title={Multi-scale hierarchical neural network models that bridge from single neurons in the primary visual cortex to primate object recognition behavior},
author={Marques, Tiago and Schrimpf, Martin and DiCarlo, James},
author+an = {2=highlight},
journal={bioRxiv},
year={2021}
}
@article{gan2021threedworld,
title={ThreeDWorld: A platform for interactive multi-modal physical simulation},
author={Gan, Chuang and Schwartz, Jeremy and Alter, Seth and Schrimpf, Martin and Traer, James and De Freitas, Julian and Kubilius, Jonas and Bhandwaldar, Abhishek and Haber, Nick and Sano, Megumi and others},
author+an = {4=highlight},
journal={\\\textbf{Oral}, Neural Information Processing Systems (NeurIPS)},
year={2021}
}
@article{Casper2021frivolous,
abstract = {Deep neural networks (DNNs) perform well on a variety of tasks despite the fact that most networks used in practice are vastly overparametrized and even capable of perfectly fitting randomly labeled data. Recent evidence suggests that developing compressible representations is key for adjusting the complexity of overparametrized networks to the task at hand [4, 42]. In this paper, we provide new empirical evidence that supports this hypothesis by identifying two types of units that emerge when the network's width is increased: removable units which can be dropped out of the network without significant change to the output and repeated units whose activities are highly correlated with other units. The emergence of these units implies capacity constraints as the function the network represents could be expressed by a smaller network without these units. In a series of experiments with AlexNet, ResNet and Inception networks in the CIFAR-10 and ImageNet datasets, and also using shallow networks with synthetic data, we show that DNNs consistently increase either the number of removable units, repeated units, or both at greater widths for a comprehensive set of hyperparameters. These results suggest that the mechanisms by which networks in the deep learning regime adjust their complexity operate at the unit level and highlight the need for additional research into what drives the emergence of such units.},
archivePrefix = {arXiv},
arxivId = {1912.04783},
author = {Casper, Stephen and Boix, Xavier and D'Amario, Vanessa and Guo, Ling and Schrimpf, Martin and Vinken, Kasper and Kreiman, Gabriel},
author+an = {5=highlight},
journal = {AAAI},
opteprint = {1912.04783},
file = {:C$\backslash$:/Users/Martin/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Casper et al. - 2019 - Removable andor Repeated Units Emerge in Overparametrized Deep Neural Networks.pdf:pdf},
month = {dec},
title = {{Frivolous Units: Wider Networks are not really that Wide}},
url = {http://arxiv.org/abs/1912.04783},
year = {2021}
}
@article{Schrimpf2020snl,
title={Predictive Neural Language Models Capture Language Processing in the Brain},
author={Schrimpf, Martin and Blank, Idan and Tuckute, Greta and Kauf, Carina and Hosseini, Eghbal and Kanwisher, Nancy and Tenenbaum, Joshua and Fedorenko, Evelina},
author+an = {1=highlight},
journal={\\\textbf{Oral}, Society for the Neurobiology of Language\\\textbf{Merit Award Honorable Mention}},
year={2020},
}
@article{Schrimpf2021language,
title={The neural architecture of language: Integrative modeling converges on predictive processing},
author={Schrimpf, Martin and Blank, Idan and Tuckute, Greta and Kauf, Carina and Hosseini, Eghbal and Kanwisher, Nancy and Tenenbaum, Joshua and Fedorenko, Evelina},
author+an = {1=highlight},
journal={Proceedings of the National Academy of Sciences (PNAS)},
year={2021},
url={https://www.pnas.org/content/118/45/e2105646118}
}
@article{Zhuang2020,
abstract = {Deep neural networks currently provide the best quantitative models of the response patterns of neurons throughout the primate ventral visual stream. However, such networks have remained implausible as a model of the development of the ventral stream, in part because they are trained with supervised methods requiring many more labels than are accessible to infants during development. Here, we report that recent rapid progress in unsupervised learning has largely closed this gap. We find that neural network models learned with deep unsupervised contrastive embedding methods achieve neural prediction accuracy in multiple ventral visual cortical areas that equals or exceeds that of models derived using today's best supervised methods, and that the mapping of these neural network models' hidden layers is neuroanatomically consistent across the ventral stream. Moreover, we find that these methods produce brain-like representations even when trained on noisy and limited data measured from real children's developmental experience. We also find that semi-supervised deep contrastive embeddings can leverage small numbers of labelled examples to produce representations with substantially improved error-pattern consistency to human behavior. Taken together, these results suggest that deep contrastive embedding objectives may be a biologically-plausible computational theory of primate visual development. {\#}{\#}{\#} Competing Interest Statement The authors have declared no competing interest.},
author = {Zhuang, Chengxu and Yan, Siming and Nayebi, Aran and Schrimpf, Martin and Frank, Michael C. and DiCarlo, James J. and Yamins, Daniel L. K.},
author+an = {4=highlight},
doi = {10.1101/2020.06.16.155556},
file = {:C$\backslash$:/Users/Martin/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Zhuang et al. - 2020 - Unsupervised Neural Network Models of the Ventral Visual Stream.pdf:pdf},
journal = {Proceedings of the National Academy of Sciences (PNAS)},
month = {jun},
publisher = {Cold Spring Harbor Laboratory},
title = {{Unsupervised Neural Network Models of the Ventral Visual Stream}},
url = {https://www.biorxiv.org/content/10.1101/2020.06.16.155556v1 https://www.biorxiv.org/content/10.1101/2020.06.16.155556v1.abstract},
year = {2020}
}
@article{DapelloMarques2020,
title={Simulating a Primary Visual Cortex at the Front of CNNs Improves Robustness to Image Perturbations},
author={Dapello*, Joel and Marques*, Tiago and Schrimpf, Martin and Geiger, Franziska and DiCarlo, James J.},
author+an = {3=highlight},
journal = {\\\textbf{Spotlight}, Neural Information Processing Systems (NeurIPS)},
optacceptance={3\% acceptance rate for spotlights, 280/9454},
year={2020},
}
@article{GeigerSchrimpf2022Wiring,
title={Wiring Up Vision: Minimizing Supervised Synaptic Updates Needed to Produce a Primate Ventral Stream},
author={Geiger*, Franziska and Schrimpf*, Martin and Marques, Tiago and DiCarlo, James J.},
author+an = {2=highlight},
journal={\\\textbf{Spotlight}, International Conference on Learning Representations (ICLR)},
year={2022},
optacceptance={5\% acceptance rate for spotlights, 176/3391},
}
@article{Schrimpf2020Neuron,
title={Integrative Benchmarking to Advance Neurally-Mechanistic Models of Human Intelligence},
author={Schrimpf, Martin and Kubilius, Jonas and Lee, Michael and Murty, N. Apurva Ratan and Ajemian, Robert and DiCarlo, James J.},
author+an = {1=highlight},
journal={Neuron},
year={2020},
}
@article{schrimpf2020neuromatch,
title={Brain-Like Object Recognition with High-Performing Shallow Recurrent ANNs},
author={Schrimpf*, Martin and Kubilius*, Jonas and Nayebi, Aran and Bear, Daniel and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {1=highlight},
journal={\\\textbf{Oral}, neuromatch},
optacceptance={9\% acceptance rate for contributed talks, 10/107},
year={2020},
abstract = {Deep artificial neural networks (ANNs) are the leading class of candidate models of the primate ventral stream. While initially inspired by brain anatomy, over the past years, these ANNs have evolved from a simple eight-layer architecture to extremely deep architectures, bringing into question how brain-like they still are and making it hard to map onto the brain's anatomy. Here we demonstrate that better anatomical alignment to the brain and high performance on machine learning as well as neuroscience measures do not have to be in contradiction.
We developed CORnet-S, a shallow ANN with four anatomically mapped areas and recurrent connectivity, guided by Brain-Score, a new large-scale composite of neural and behavioral benchmarks for quantifying the functional fidelity of models of the primate ventral visual stream. Despite being significantly shallower than most models, CORnet-S outperforms similarly compact models on Brain-Score and ImageNet. Moreover, ablation studies reveal that recurrence is the main predictive factor of both Brain-Score and ImageNet top-1 performance. Finally, we report that the temporal evolution of the CORnet-S "IT" neural population resembles the actual monkey IT population dynamics.
Taken together, these results establish CORnet-S, a compact, recurrent ANN, as the current best model of the primate ventral visual stream.}
}
@article{marques2020cosyne,
title={Hierarchical neural network models that more closely match primary visual cortex also better explain high-level vision},
author={Marques, Tiago and Schrimpf, Martin and DiCarlo, James J.},
author+an = {2=highlight},
journal={Computational and Systems Neuroscience (Cosyne)},
year={2020},
}
@article{Casper2021removable,
abstract = {Deep neural networks (DNNs) perform well on a variety of tasks despite the fact that most networks used in practice are vastly overparametrized and even capable of perfectly fitting randomly labeled data. Recent evidence suggests that developing compressible representations is key for adjusting the complexity of overparametrized networks to the task at hand. In this paper, we provide new empirical evidence that supports this hypothesis by identifying two types of units that emerge when the network's width is increased: removable units which can be dropped out of the network without significant change to the output and repeated units whose activities are highly correlated with other units. The emergence of these units implies capacity constraints as the function the network represents could be expressed by a smaller network without these units. In a series of experiments with AlexNet, ResNet and Inception networks in the CIFAR-10 and ImageNet datasets, and also using shallow networks with synthetic data, we show that DNNs consistently increase either the number of removable units, repeated units, or both at greater widths for a comprehensive set of hyperparameters. These results suggest that the mechanisms by which networks in the deep learning regime adjust their complexity operate at the unit level and highlight the need for additional research into what drives the emergence of such units.},
archivePrefix = {arXiv},
optarxivId = {1912.04783},
author = {Casper, Stephen and Boix, Xavier and D'Amario, Vanessa and Guo, Ling and Schrimpf, Martin and Vinken, Kasper and Kreiman, Gabriel},
author+an = {5=highlight},
opteprint = {1912.04783},
file = {:C$\backslash$:/Users/Martin/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Casper et al. - 2019 - Removable andor Repeated Units Emerge in Overparametrized Deep Neural Networks.pdf:pdf},
journal = {arXiv},
month = {dec},
title = {{Removable and/or Repeated Units Emerge in Overparametrized Deep Neural Networks}},
url = {http://arxiv.org/abs/1912.04783},
year = {2021}
}
@article{KubiliusSchrimpf2019,
author = {Kubilius*, Jonas and Schrimpf*, Martin and Nayebi, Aran and Bear, Daniel and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {2=highlight},
title = {Brain-Like Object Recognition with High-Performing Shallow Recurrent ANNs},
year = {2019},
optacceptance={0.5\% acceptance rate, 36/6743},
abstract = {Deep convolutional artificial neural networks (ANNs) are the leading class of candidate models of the mechanisms of visual processing in the primate ventral stream. While initially inspired by brain anatomy, over the past years, these ANNs have evolved from a simple eight-layer architecture in AlexNet to extremely deep and branching architectures, demonstrating increasingly better object categorization performance, yet bringing into question how brain-like they still are. In particular, typical deep models from the machine learning community are often hard to map onto the brain's anatomy due to their vast number of layers and missing biologically-important connections, such as recurrence. Here we demonstrate that better anatomical alignment to the brain and high performance on machine learning as well as neuroscience measures do not have to be in contradiction. We developed CORnet-S, a shallow ANN with four anatomically mapped areas and recurrent connectivity, guided by Brain-Score, a new large-scale composite of neural and behavioral benchmarks for quantifying the functional fidelity of models of the primate ventral visual stream. Despite being significantly shallower than most models, CORnet-S is the top model on Brain-Score and outperforms similarly compact models on ImageNet. Moreover, our extensive analyses of CORnet-S circuitry variants reveal that recurrence is the main predictive factor of both Brain-Score and ImageNet top-1 performance. Finally, we report that the temporal evolution of the CORnet-S "IT" neural population resembles the actual monkey IT population dynamics. Taken together, these results establish CORnet-S, a compact, recurrent ANN, as the current best model of the primate ventral visual stream.},
journal = {\\\textbf{Oral}, Neural Information Processing Systems (NeurIPS)}
}
@article{Jozwik2019primatehuman,
abstract = {Specific deep artificial neural networks (ANNs) are the current best models of ventral visual processing and object recognition behavior in monkeys. We here explore whether models of non-human primate vision generalize to visual processing in the human primate brain. Specifically, we asked if model match to monkey IT is a predictor of model match to human IT, even when scoring those matches on different images. We found that the model match to monkey IT is a positive predictor of the model match to human IT (R = 0.36), and that this approach outperforms the current standard predictor of model accuracy on ImageNet. This suggests a more powerful approach for pre-selecting models as hypotheses of human brain processing.},
author = {Jozwik, Kamila Maria and Schrimpf, Martin and Kanwisher, Nancy and DiCarlo, James J.},
author+an = {2=highlight},
doi = {10.1101/688390},
file = {:C$\backslash$:/Users/Martin/AppData/Local/Mendeley Ltd./Mendeley Desktop/Downloaded/Jozwik et al. - 2019 - To find better neural network models of human vision, find better neural network models of primate vision.pdf:pdf},
journal = {bioRxiv},
month = {jul},
publisher = {Cold Spring Harbor Laboratory},
title = {{To find better neural network models of human vision, find better neural network models of primate vision}},
url = {https://www.biorxiv.org/content/10.1101/688390v1.full https://www.biorxiv.org/content/10.1101/688390v1},
year = {2019}
}
@article{bashivan2018continual,
title={Continual Learning with Self-Organizing Maps},
author={Pouya Bashivan and Martin Schrimpf and Robert Ajemian and Irina Rish and Matthew Riemer and Yuhai Tu},
author+an = {2=highlight},
journal={Neural Information Processing Systems (NeurIPS) Continual Learning Workshop},
year={2018},
}
@article{arend2018single,
title={Single units in a deep neural network functionally correspond with neurons in the brain: preliminary results},
author={Luke Arend and Yena Han and Martin Schrimpf and Pouya Bashivan and Kohitij Kar and Tomaso Poggio and James J DiCarlo and Xavier Boix},
author+an = {3=highlight},
journal={CBMM Memo},
year={2018},
}
@article{schrimpf2019cosyne,
title={Using Brain-Score to Evaluate and Build Neural Networks for Brain-Like Object Recognition},
author={Schrimpf*, Martin and Kubilius*, Jonas and Hong, Ha and Majaj, Najib J. and Rajalingham, Rishi and Issa, Elias B. and Kar, Kohitij and Ziemba, Corey and Bashivan, Pouya and Prescott-Roy, Jonathan and Schmidt, Kailyn and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {1=highlight},
journal={Computational and Systems Neuroscience (Cosyne)},
year={2019},
}
@article{schrimpf2018ccn,
title={Brain-Score: Which Artificial Neural Network Best Emulates the Brain’s Neural Network?},
author={Schrimpf*, Martin and Kubilius*, Jonas and Hong, Ha and Majaj, Najib J. and Rajalingham, Rishi and Issa, Elias B. and Kar, Kohitij and Bashivan, Pouya and Prescott-Roy, Jonathan and Schmidt, Kailyn and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {1=highlight},
journal={Cognitive Computational Neuroscience (CCN)},
year={2018},
}
@article{kubilius2018,
author = {Kubilius*, Jonas and Schrimpf*, Martin and Nayebi, Aran and Bear, Daniel and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {2=highlight},
title = {CORnet: Modeling the Neural Mechanisms of Core Object Recognition},
year = {2018},
abstract = {Deep artificial neural networks with spatially repeated processing (a.k.a., deep convolutional ANNs) have been established as the best class of candidate models of visual processing in primate ventral visual processing stream. Over the past five years, these ANNs have evolved from a simple feedforward eight-layer architecture in AlexNet to extremely deep and branching NASNet architectures, demonstrating increasingly better object categorization performance and increasingly better explanatory power of both neural and behavioral responses. However, from the neuroscientist{\textquoteright}s point of view, the relationship between such very deep architectures and the ventral visual pathway is incomplete in at least two ways. On the one hand, current state-of-the-art ANNs appear to be too complex (e.g., now over 100 levels) compared with the relatively shallow cortical hierarchy (4-8 levels), which makes it difficult to map their elements to those in the ventral visual stream and to understand what they are doing. On the other hand, current state-of-the-art ANNs appear to be not complex enough in that they lack recurrent connections and the resulting neural response dynamics that are commonplace in the ventral visual stream. Here we describe our ongoing efforts to resolve both of these issues by developing a "CORnet" family of deep neural network architectures. Rather than just seeking high object recognition performance (as the state-of-the-art ANNs above), we instead try to reduce the model family to its most important elements and then gradually build new ANNs with recurrent and skip connections while monitoring both performance and the match between each new CORnet model and a large body of primate brain and behavioral data. We report here that our current best ANN model derived from this approach (CORnet-S) is among the top models on Brain-Score, a composite benchmark for comparing models to the brain, but is simpler than other deep ANNs in terms of the number of convolutions performed along the longest path of information processing in the model. All CORnet models are available at https://github.com/dicarlolab/CORnet, and we plan to update this manuscript and the available models in this family as they are produced.},
journal = {bioRxiv}
}
@article{schrimpf2018b,
author = {Schrimpf*, Martin and Kubilius*, Jonas and Hong, Ha and Majaj, Najib J. and Rajalingham, Rishi and Issa, Elias B. and Kar, Kohitij and Bashivan, Pouya and Prescott-Roy, Jonathan and Schmidt, Kailyn and Yamins, Daniel L. K. and DiCarlo, James J.},
author+an = {1=highlight},
title = {Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},
year = {2018},
optdoi = {10.1101/407007},
optpublisher = {Cold Spring Harbor Laboratory},
abstract = {The internal representations of early deep artificial neural networks (ANNs) were found to be remarkably similar to the internal neural representations measured experimentally in the primate brain. Here we ask, as deep ANNs have continued to evolve, are they becoming more or less brain-like? ANNs that are most functionally similar to the brain will contain mechanisms that are most like those used by the brain. We therefore developed Brain-Score - a composite of multiple neural and behavioral benchmarks that score any ANN on how similar it is to the brain{\textquoteright}s mechanisms for core object recognition - and we deployed it to evaluate a wide range of state-of-the-art deep ANNs. Using this scoring system, we here report that: (1) DenseNet-169, CORnet-S and ResNet-101 are the most brain-like ANNs. (2) There remains considerable variability in neural and behavioral responses that is not predicted by any ANN, suggesting that no ANN model has yet captured all the relevant mechanisms. (3) Extending prior work, we found that gains in ANN ImageNet performance led to gains on Brain-Score. However, correlation weakened at \>= 70\% top-1 ImageNet performance, suggesting that additional guidance from neuroscience is needed to make further advances in capturing brain mechanisms. (4) We uncovered smaller (i.e. less complex) ANNs that are more brain-like than many of the best-performing ImageNet models, which suggests the opportunity to simplify ANNs to better understand the ventral stream. The scoring system used here is far from complete. However, we propose that evaluating and tracking model-benchmark correspondences through a Brain-Score that is regularly updated with new brain data is an exciting opportunity: experimental benchmarks can be used to guide machine network evolution, and machine networks are mechanistic hypotheses of the brain{\textquoteright}s network and thus drive next experiments. To facilitate both of these, we release Brain-Score.org: a platform that hosts the neural and behavioral benchmarks, where ANNs for visual processing can be submitted to receive a Brain-Score and their rank relative to other models, and where new experimental data can be naturally incorporated.},
optURL = {https://www.biorxiv.org/content/early/2018/09/05/407007},
opteprint = {https://www.biorxiv.org/content/early/2018/09/05/407007.full.pdf},
journal = {bioRxiv}
}
@article{boix2018,
title={Redundancy Emerges in Overparametrized Deep Neural Networks},
author={Xavier Boix* and Martin Schrimpf* and Arend, Luke and Poggio, Tomaso and Kreiman, Gabriel},
author+an = {2=highlight},
journal={submitted},
year={2018},
}
@article{schrimpf2018,
title={A Flexible Approach to Automated RNN Architecture Generation},
author={Martin Schrimpf* and Stephen Merity* and Richard Socher},
author+an = {1=highlight},
journal={International Conference on Learning Representations (ICLR)},% (Workshop Track)},
year={2018},
url = "https://arxiv.org/abs/1712.07316",
}
@article{cheney2017robustness,
title={On the Robustness of Convolutional Neural Networks to Internal Architecture and Weight Perturbations},
author={Nicholas Cheney* and Martin Schrimpf* and Gabriel Kreiman},
author+an = {2=highlight},
journal={CBMM Memo},
year={2017},
abstract = "Deep convolutional neural networks are generally regarded as robust function approximators. So far, this intuition is based on perturbations to external stimuli such as the images to be classified. Here we explore the robustness of convolutional neural networks to perturbations to the internal weights and architecture of the network itself. We show that convolutional networks are surprisingly robust to a number of internal perturbations in the higher convolutional layers but the bottom convolutional layers are much more fragile. For instance, Alexnet shows less than a 30\% decrease in classification performance when randomly removing over 70\% of weight connections in the top convolutional or dense layers but performance is almost at chance with the same perturbation in the first convolutional layer. Finally, we suggest further investigations which could continue to inform the robustness of convolutional networks to internal perturbations."
}
@article {TangSchrimpfLotter2018,
author = {Tang*, Hanlin and Schrimpf*, Martin and Lotter*, William and Moerman, Charlotte and Paredes, Ana and Ortega Caro, Josue and Hardesty, Walter and Cox, David and Kreiman, Gabriel},
author+an = {2=highlight},
title = {Recurrent computations for visual pattern completion},
year = {2018},
optdoi = {10.1073/pnas.1719397115},
publisher = {National Academy of Sciences},
abstract = {The ability to complete patterns and interpret partial information is a central property of intelligence. Deep convolutional network architectures have proved successful in labeling whole objects in images and capturing the initial 150 ms of processing along the ventral visual cortex. This study shows that human object recognition abilities remain robust when only small amounts of information are available due to heavy occlusion, but the performance of bottom-up computational models is impaired under limited visibility. The results provide combined behavioral, neurophysiological, and modeling insights showing how recurrent computations may help the brain solve the fundamental challenge of pattern completion.Making inferences from partial information constitutes a critical aspect of cognition. During visual perception, pattern completion enables recognition of poorly visible or occluded objects. We combined psychophysics, physiology, and computational models to test the hypothesis that pattern completion is implemented by recurrent computations and present three pieces of evidence that are consistent with this hypothesis. First, subjects robustly recognized objects even when they were rendered \<15\% visible, but recognition was largely impaired when processing was interrupted by backward masking. Second, invasive physiological responses along the human ventral cortex exhibited visually selective responses to partially visible objects that were delayed compared with whole objects, suggesting the need for additional computations. These physiological delays were correlated with the effects of backward masking. Third, state-of-the-art feed-forward computational architectures were not robust to partial visibility. However, recognition performance was recovered when the model was augmented with attractor-based recurrent connectivity. The recurrent model was able to predict which images of heavily occluded objects were easier or harder for humans to recognize, could capture the effect of introducing a backward mask on recognition behavior, and was consistent with the physiological delays along the human ventral visual stream. These results provide a strong argument of plausibility for the role of recurrent computations in making visual inferences from partial information.},
optissn = {0027-8424},
optURL = {http://www.pnas.org/content/early/2018/08/07/1719397115},
opteprint = {http://www.pnas.org/content/early/2018/08/07/1719397115.full.pdf},
journal = {Proceedings of the National Academy of Sciences (PNAS)}
}
@article{schrimpf2016,
author = {Schrimpf, Martin and Tang, Hanlin and Lotter, William and Paredes, Ana and Ortega Caro, Josue and Hardesty, Walter and Cox, David and Kreiman, Gabriel},
author+an = {1=highlight},
title = {Recurrent computations for pattern completion},
year = {2016},
journal = {Neural Information Processing Systems (NIPS) Brains and Bits Workshop}
}
@manual{should_i_use_tensorflow,
author = "Martin Schrimpf",
title = "Should I use Tensorflow",
note = "Seminar Paper",
organization = "University of Augsburg",
year = "2016",
archivePrefix = "arXiv",
eprint = {1611.08903},
primaryClass = "cs.AI",
url = "https://arxiv.org/abs/1611.08903",
abstract = "Google's Machine Learning framework TensorFlow was open-sourced in November 2015 and has since built a growing community around it. TensorFlow is supposed to be flexible for research purposes while also allowing its models to be deployed productively. This work is aimed towards people with experience in Machine Learning considering whether they should use TensorFlow in their environment. Several aspects of the framework important for such a decision are examined, such as the heterogenity, extensibility and its computation graph. A pure Python implementation of linear classification is compared with an implementation utilizing TensorFlow. I also contrast TensorFlow to other popular frameworks with respect to modeling capability, deployment and performance and give a brief description of the current adaption of the framework."
}
@MastersThesis{bachelors_thesis,
type = "Bachelor's Thesis",
author = "Martin Schrimpf",
title = "Scalable Database Concurrency Control using Transactional Memory",
school = "Technical University Munich",
year = "2014",
url = "http://mschrimpf.com/wp-content/uploads/2016/11/Scalable-Database-Concurrency-Control-using-Transactional-Memory-Martin-Schrimpf-TextSigned.pdf",
abstract = "Intel recently made available the optimistic synchronization technique Hardware Transactional
Memory (HTM) in their mainstream Haswell processor microarchitecture.
The first part of this work evaluates the core performance characteristics of the two programming
interfaces within Intel’s Transactional Synchronization Extensions (TSX), Hardware Lock Elision
(HLE) and Restricted Transactional Memory (RTM). Therein, a scope of application is defined
regarding inter alia the transaction size which is limited to the L1 DCache or even less with wrongly
aligned data due to cache associativity, the transaction duration restricted by Hardware interrupts
and a limit to the nesting of transactions. By comparing common data structures and analyzing
the behavior of HTM using hardware counters, the Hashmap is identified as a suitable structure
with a 134% speedup compared to classical POSIX mutexes.
In the second part, several latching mechanisms of MySQL InnoDB’s Concurrency Control are
selected and modified with different implementations of HTM to achieve increased scalability. We
find that it does not suffice to apply HTM naively to all mutex calls by using either HLE prefixes or
an HTM-enabled glibc. Furthermore, many transactional cycles often come at the price of frequent
aborted cycles which inhibits performance increases when measuring MySQL with the tx-bench
and too many aborts can even decrease the throughput to 29% of the unmodified version."
}