bib.bib

@inproceedings{soricut2015unsupervised,
	title={Unsupervised Morphology Induction Using Word Embeddings.},
	author={Soricut, Radu and Och, Franz Josef},
	year = {2015},
	publisher={Proceedings of the North American Association for Computational Linguistics Conference (NAACL-2015)}
}

@inproceedings{luong2015effective,
	title={Effective Approaches to Attention-based Neural Machine Translation},
	author={Luong, Thang and Pham, Hieu and Manning, Christopher D},
	booktitle={Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing},
	pages={1412--1421},
	year={2015}
}


@inproceedings{gehring2017convolutional,
	title={Convolutional Sequence to Sequence Learning},
	author={Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N},
	booktitle={International Conference on Machine Learning},
	pages={1243--1252},
	year={2017}
}


@article{gulcehre2015using,
	title={On using monolingual corpora in neural machine translation},
	author={Gulcehre, Caglar and Firat, Orhan and Xu, Kelvin and Cho, Kyunghyun and Barrault, Loic and Lin, Huei-Chi and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
	journal={CoRR, abs/1503.03535},
	volume={15},
	year={2015}
}


@article{johnson2017google,
	title={Google's Multilingual Neural Machine Translation System: Enabling Zero-Shot Translation},
	author={Johnson, Melvin and Schuster, Mike and Le, Quoc V and Krikun, Maxim and Wu, Yonghui and Chen, Zhifeng and Thorat, Nikhil and Vi{\'e}gas, Fernanda and Wattenberg, Martin and Corrado, Greg and others},
	journal={Transactions of the Association of Computational Linguistics},
	volume={5},
	number={1},
	pages={339--351},
	year={2017}
}

@inproceedings{delbrouck2017visually,
	title={Visually Grounded Word Embeddings and Richer Visual Features for Improving Multimodal Neural Machine Translation},
	author={Delbrouck, Jean-Benoit and Dupont, St{\'e}phane and Seddati, Omar},
	booktitle={Proc. GLU 2017 International Workshop on Grounding Language Understanding},
	pages={62--67},
	year={2017}
}
@inproceedings{garcia2015document,
	title={Document-Level Machine Translation with Word Vector Models},
	author={Garcia, Eva Mart{\'\i}nez and Espa{\~n}a-Bonet, Cristina and M{\`a}rquez, Llu{\'\i}s},
	booktitle={Proceedings of the 18th Annual Conference of the European Association for Machine Translation},
	year={2015}
}


@book{national1966language,
	title={Language and Machines: Computers in Translation and Linguistics; a Report},
	author={ALPAC},
	volume={1416},
	year={1966},
	publisher={National Research Council (US). Automatic Language Processing Advisory Committee, National Academies}
}


@article{evans2009myth,
	title={The myth of language universals: Language diversity and its importance for cognitive science},
	author={Evans, Nicholas and Levinson, Stephen C},
	journal={Behavioral and brain sciences},
	volume={32},
	number={5},
	pages={429--448},
	year={2009},
	publisher={Cambridge University Press}
}

@article{koehn2017statistical,
	title={Statistical Machine Translation},
	author={Koehn, Philipp},
	journal={Draft of chapter},
	volume={13},
	year={2017}
}

@phdthesis{luong2016neural,
	title={Neural Machine Translation},
	author={Luong, Minh-Thang},
	year={2016},
	school={Stanford University}
}


@article{koehn2017neural,
	title={Neural Machine Translation},
	author={Koehn, Philipp},
	journal={arXiv preprint arXiv:1709.07809},
	year={2017}
}

@inproceedings{koehn2017six,
	title={Six Challenges for Neural Machine Translation},
	author={Koehn, Philipp and Knowles, Rebecca},
	booktitle={Proceedings of the First Workshop on Neural Machine Translation},
	pages={28--39},
	year={2017}
}


@article{russell2002artificial,
	title={Artificial intelligence: a modern approach (International Edition)},
	author={Russell, Stuart J and Norvig, Peter},
	year={2002},
	publisher={$\{$Pearson US Imports \& PHIPEs$\}$}
}

@inproceedings{sutskever2014sequence,
	title={Sequence to sequence learning with neural networks},
	author={Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
	booktitle={Advances in neural information processing systems},
	pages={3104--3112},
	year={2014}
}

@inproceedings{botha2014compositional,
	title={Compositional morphology for word representations and language modelling},
	author={Botha, Jan and Blunsom, Phil},
	booktitle={International Conference on Machine Learning},
	pages={1899--1907},
	year={2014}
}

@article{luong2013better,
	title={Better Word Representations with Recursive Neural Networks for Morphology},
	author={Luong, Minh-Thang and Socher, Richard and Manning, Christopher D},
	journal={CoNLL-2013},
	pages={104},
	year={2013}
}

@article{bhatia2016morphological,
	title={Morphological priors for probabilistic neural word embeddings},
	author={Bhatia, Parminder and Guthrie, Robert and Eisenstein, Jacob},
	journal={In Proceedings of Empirical Methods for  Natural Language Processing (EMNLP).},
	year={2016}
}


@inproceedings{kim2016character,
	title={Character-Aware Neural Language Models.},
	author={Kim, Yoon and Jernite, Yacine and Sontag, David and Rush, Alexander M},
	booktitle={AAAI},
	pages={2741--2749},
	year={2016}
}

@article{wieting2016charagram,
	title={Charagram: Embedding words and sentences via character n-grams},
	author={Wieting, John and Bansal, Mohit and Gimpel, Kevin and Livescu, Karen},
	journal={In Proceedings of Empirical Methods for  Natural Language Processing (EMNLP).},
	year={2016}
}

@article{jean2014using,
	title={On using very large target vocabulary for neural machine translation},
	author={Jean, S{\'e}bastien and Cho, Kyunghyun and Memisevic, Roland and Bengio, Yoshua},
	journal={In
	53rd Annual Meeting of the Association for Computational
	Linguistics
	},
	year={2014}
}

@article{choi2017context,
	title={Context-dependent word representation for neural machine translation},
	author={Choi, Heeyoul and Cho, Kyunghyun and Bengio, Yoshua},
	journal={Computer Speech \& Language},
	volume={45},
	pages={149--160},
	year={2017},
	publisher={Elsevier}
}

@article{luong2016achieving,
	title={Achieving open vocabulary neural machine translation with hybrid word-character models},
	author={Luong, Minh-Thang and Manning, Christopher D},
	journal={CoRR abs/1604.00788
	},
	year={2016}
}


@article{cho2014properties,
	title={On the properties of neural machine translation: Encoder-decoder approaches},
	author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Bahdanau, Dzmitry and Bengio, Yoshua},
	journal={Semantics and Structure in Statistical Translation (2014): 103.},
	year={2014}
}

@article{cho2014learning,
	title={Learning phrase representations using RNN encoder-decoder for statistical machine translation},
	author={Cho, Kyunghyun and Van Merri{\"e}nboer, Bart and Gulcehre, Caglar and Bahdanau, Dzmitry and Bougares, Fethi and Schwenk, Holger and Bengio, Yoshua},
	journal={Conference on Empirical Methods in Natural Language Processing},
	year={2014}
}

@article{bahdanau2014neural,
	title={Neural machine translation by jointly learning to align and translate},
	author={Bahdanau, Dzmitry and Cho, Kyunghyun and Bengio, Yoshua},
	journal={In
	International Conference on Learning Representations
	},
	year={2015}
}

@inproceedings{koehn2003statistical,
	title={Statistical phrase-based translation},
	author={Koehn, Philipp and Och, Franz Josef and Marcu, Daniel},
	booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1},
	pages={48--54},
	year={2003},
	organization={Association for Computational Linguistics}
}


@inproceedings{kalchbrenner2013recurrent,
	title={Recurrent Continuous Translation Models.},
	author={Kalchbrenner, Nal and Blunsom, Phil},
	journal={EMNLP},
	year={2013},
}

@article{graves2013generating,
	title={Generating sequences with recurrent neural networks},
	author={Graves, Alex},
	journal={arXiv preprint arXiv:1308.0850},
	year={2013}
}


@article{bengio2003neural,
	title={A neural probabilistic language model},
	author={Bengio, Yoshua and Ducharme, R{\'e}jean and Vincent, Pascal and Jauvin, Christian},
	journal={Journal of machine learning research},
	volume={3},
	number={Feb},
	pages={1137--1155},
	year={2003}
}

@inproceedings{mikolov2013distributed,
	title={Distributed representations of words and phrases and their compositionality},
	author={Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
	booktitle={Advances in neural information processing systems},
	pages={3111--3119},
	year={2013}
}

@article{mikolov2013efficient,
	title={Efficient estimation of word representations in vector space},
	author={Mikolov, Tomas and Chen, Kai and Corrado, Greg and Dean, Jeffrey},
	journal={arXiv preprint arXiv:1301.3781},
	year={2013}
}

@inproceedings{pennington2014glove,
	title={Glove: Global vectors for word representation},
	author={Pennington, Jeffrey and Socher, Richard and Manning, Christopher},
	booktitle={Proceedings of the 2014 conference on empirical methods in natural language processing (EMNLP)},
	pages={1532--1543},
	year={2014}
}

@book{rojas2013neural,
	title={Neural networks: a systematic introduction},
	author={Rojas, Ra{\'u}l},
	year={2013},
	publisher={Springer Science \& Business Media}
}


@inproceedings{bahar2017rwth,
	title={The RWTH Aachen machine translation systems for IWSLT 2017},
	author={Bahar, Parnia and Rosendahl, Jan and Rossenbach, Nick and Ney, Hermann},
	booktitle={Int. Workshop on Spoken Language Translation},
	pages={29--34},
	year={2017}
}

@article{bojanowski2016enriching,
	title={Enriching word vectors with subword information},
	author={Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
	journal={TACL'17},
	year={2017}
}

@inproceedings{luong2015addressing,
	title={Addressing the rare word problem in neural machine translation},
	author={Luong, Minh-thang and Sutskever, Ilya and Le, Quoc V and Vinyals, Oriol and Zaremba, Wojciech},
	booktitle={In ACL},
	year={2015},
	organization={Citeseer}
}

@inproceedings{zesch2006automatically,
	title={Automatically creating datasets for measures of semantic relatedness},
	author={Zesch, Torsten and Gurevych, Iryna},
	booktitle={Proceedings of the Workshop on Linguistic Distances},
	pages={16--24},
	year={2006},
	organization={Association for Computational Linguistics}
}


@inproceedings{koehn2003statistical,
title={Statistical phrase-based translation},
author={Koehn, Philipp and Och, Franz Josef and Marcu, Daniel},
booktitle={Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology-Volume 1},
pages={48--54},
year={2003},
organization={Association for Computational Linguistics}
}

@article{brown1993mathematics,
title={The mathematics of statistical machine translation: Parameter estimation},
author={Brown, Peter F and Pietra, Vincent J Della and Pietra, Stephen A Della and Mercer, Robert L},
journal={Computational linguistics},
volume={19},
number={2},
pages={263--311},
year={1993},
publisher={MIT Press}
}

@phdthesis{och2002statistical,
	title={Statistical machine translation: from single-word models to alignment templates},
	author={Och, Franz Josef},
	year={2002},
	school={Bibliothek der RWTH Aachen}
}

@inproceedings{galley2006scalable,
title={Scalable inference and training of context-rich syntactic translation models},
author={Galley, Michel and Graehl, Jonathan and Knight, Kevin and Marcu, Daniel and DeNeefe, Steve and Wang, Wei and Thayer, Ignacio},
booktitle={Proceedings of the 21st International Conference on Computational Linguistics and the 44th annual meeting of the Association for Computational Linguistics},
pages={961--968},
year={2006},
organization={Association for Computational Linguistics}
}

@techreport{galley2004s,
title={What's in a translation rule},
author={Galley, Michel and Hopkins, Mark and Knight, Kevin and Marcu, Daniel},
year={2004},
institution={Columbia Univ New York Dept of Computer Science}
}

@article{chiang2007hierarchical,
title={Hierarchical phrase-based translation},
author={Chiang, David},
journal={Computational linguistics},
volume={33},
number={2},
pages={201--228},
year={2007},
publisher={MIT Press}
}


@inproceedings{marcu2002phrase,
title={A phrase-based, joint probability model for statistical machine translation},
author={Marcu, Daniel and Wong, William},
booktitle={Proceedings of the ACL-02 conference on Empirical methods in natural language processing-Volume 10},
pages={133--139},
year={2002},
organization={Association for Computational Linguistics}
}


@article{bojarfindings,
	title={Findings of the 2013 Workshop on Statistical Machine Translation},
	author={Bojar, Ondrej and Buck, Christian and Callison-Burch, Chris and Federmann, Christian and Haddow, Barry and Koehn, Philipp and Monz, Christof and Post, Matt and Soricut, Radu and Specia, Lucia}
}

@inproceedings{akhtar2017word,
	title={Word Similarity Datasets for Indian Languages: Annotation and Baseline Systems},
	author={Akhtar, Syed Sarfaraz and Gupta, Arihant and Vajpayee, Avijit and Srivastava, Arjit and Shrivastava, Manish},
	booktitle={Proceedings of the 11th Linguistic Annotation Workshop},
	pages={91--94},
	year={2017}
}

@inproceedings{luong2013better,
	title={Better word representations with recursive neural networks for morphology.},
	author={Luong, Thang and Socher, Richard and Manning, Christopher D},
	booktitle={CoNLL},
	pages={104--113},
	year={2013}
}

@inproceedings{sanu2017word,
	title={Word Embeddings based on Fixed-Size Ordinally Forgetting Encoding},
	author={Sanu, Joseph and Xu, Mingbin and Jiang, Hui and Liu, Quan},
	booktitle={Proceedings of the 2017 Conference on Empirical Methods in Natural Language Processing},
	pages={310--315},
	year={2017}
}

@inproceedings{sun2016inside,
	title={Inside Out: Two Jointly Predictive Models for Word Representations and Phrase Representations.},
	author={Sun, Fei and Guo, Jiafeng and Lan, Yanyan and Xu, Jun and Cheng, Xueqi},
	booktitle={AAAI},
	pages={2821--2827},
	year={2016}
}

@inproceedings{barkan2017bayesian,
	title={Bayesian Neural Word Embedding.},
	author={Barkan, Oren},
	booktitle={AAAI},
	pages={3135--3143},
	year={2017}
}

@inproceedings{sennrich2016improving,
	title={Improving Neural Machine Translation Models with Monolingual Data},
	author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
	booktitle={Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
	volume={1},
	pages={86--96},
	year={2016}
}

@article{sennrich2015neural,
title={Neural machine translation of rare words with subword units},
author={Sennrich, Rico and Haddow, Barry and Birch, Alexandra},
journal={In Proceedings of the 54th Annual Meeting of the Association for Computational Linguistics },
year={2015}
}

@inproceedings{schwenk2012continuous,
	title={Continuous Space Translation Models for Phrase-Based Statistical Machine Translation.},
	author={Schwenk, Holger},
	journal={ COLING (Posters).},
	year={2012}
}

@inproceedings{zou2013bilingual,
	title={Bilingual word embeddings for phrase-based machine translation},
	author={Zou, Will Y and Socher, Richard and Cer, Daniel and Manning, Christopher D},
	booktitle={Proceedings of the 2013 Conference on Empirical Methods in Natural Language Processing},
	pages={1393--1398},
	year={2013}
}


@article{hornik1989multilayer,
	title={Multilayer feedforward networks are universal approximators},
	author={Hornik, Kurt and Stinchcombe, Maxwell and White, Halbert},
	journal={Neural networks},
	volume={2},
	number={5},
	pages={359--366},
	year={1989},
	publisher={Elsevier}
}

@article{rumelhart1988learning,
	title={Learning representations by back-propagating errors},
	year={1988},
	author={Rumelhart, David E and Hinton, Geoffrey E and Williams, Ronald J and others},
	journal={Cognitive modeling},
	volume={5},
	number={3},
	pages={1}
}

@article{goldberg2016primer,
	title={A Primer on Neural Network Models for Natural Language Processing},
	author={Goldberg, Yoav},
	journal={Journal of Artificial Intelligence Research},
	volume={57},
	pages={345--420},
	year={2016}
}

@article{elman1990finding,
	title={Finding structure in time},
	author={Elman, Jeffrey L},
	journal={Cognitive science},
	volume={14},
	number={2},
	pages={179--211},
	year={1990},
	publisher={Wiley Online Library}
}

@article{pedregosa2011scikit,
	title={Scikit-learn: Machine learning in Python},
	author={Pedregosa, Fabian and Varoquaux, Ga{\"e}l and Gramfort, Alexandre and Michel, Vincent and Thirion, Bertrand and Grisel, Olivier and Blondel, Mathieu and Prettenhofer, Peter and Weiss, Ron and Dubourg, Vincent and others},
	journal={Journal of Machine Learning Research},
	volume={12},
	number={Oct},
	pages={2825--2830},
	year={2011}
}


@article{hochreiter1997long,
	title={Long short-term memory},
	author={Hochreiter, Sepp and Schmidhuber, J{\"u}rgen},
	journal={Neural computation},
	volume={9},
	number={8},
	pages={1735--1780},
	year={1997},
	publisher={MIT Press}
}


@misc{farizrahman4u,
	author = {Fariz Rahman},
	title = {seq2seq},
	year = {2017},
	publisher = {GitHub},
	journal = {GitHub repository},
	howpublished = {\url{https://github.com/farizrahman4u/seq2seq}},
}


@article{gage1994new,
	title={A new algorithm for data compression},
	author={Gage, Philip},
	journal={The C Users Journal},
	volume={12},
	number={2},
	pages={23--38},
	year={1994},
	publisher={R \& D Publications, Inc.}
}


@inproceedings{kumar2016ask,
	title={Ask me anything: Dynamic memory networks for natural language processing},
	author={Kumar, Ankit and Irsoy, Ozan and Ondruska, Peter and Iyyer, Mohit and Bradbury, James and Gulrajani, Ishaan and Zhong, Victor and Paulus, Romain and Socher, Richard},
	booktitle={International Conference on Machine Learning},
	pages={1378--1387},
	year={2016}
}

@inproceedings{vaswani2017attention,
	title={Attention is all you need},
	author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, {\L}ukasz and Polosukhin, Illia},
	booktitle={Advances in Neural Information Processing Systems},
	pages={6000--6010},
	year={2017}
}


@inproceedings{papineni2002bleu,
	title={BLEU: a method for automatic evaluation of machine translation},
	author={Papineni, Kishore and Roukos, Salim and Ward, Todd and Zhu, Wei-Jing},
	booktitle={Proceedings of the 40th annual meeting on association for computational linguistics},
	pages={311--318},
	year={2002},
	organization={Association for Computational Linguistics}
}


@article{ataman2017linguistically,
	title={Linguistically motivated vocabulary reduction for neural machine translation from turkish to english},
	author={Ataman, Duygu and Negri, Matteo and Turchi, Marco and Federico, Marcello},
	journal={The Prague Bulletin of Mathematical Linguistics},
	volume={108},
	number={1},
	pages={331--342},
	year={2017},
	publisher={De Gruyter Open}
}


@inproceedings{arthur2016incorporating,
	title={Incorporating Discrete Translation Lexicons into Neural Machine Translation},
	author={Arthur, Philip and Neubig, Graham and Nakamura, Satoshi},
	booktitle={Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing},
	pages={1557--1567},
	year={2016}
}


@inproceedings{collobert2008unified,
	title={A unified architecture for natural language processing: Deep neural networks with multitask learning},
	author={Collobert, Ronan and Weston, Jason},
	booktitle={Proceedings of the 25th international conference on Machine learning},
	pages={160--167},
	year={2008},
	organization={ACM}
}


@inproceedings{cettolo2012wit3,
	title={Wit3: Web inventory of transcribed and translated talks},
	author={Cettolo, Mauro and Christian, Girardi and Marcello, Federico},
	booktitle={Conference of European Association for Machine Translation},
	pages={261--268},
	year={2012}
}


@inproceedings{cettolo2014report,
	title={Report on the 11th IWSLT evaluation campaign, IWSLT 2014},
	author={Cettolo, Mauro and Niehues, Jan and St{\"u}ker, Sebastian and Bentivogli, Luisa and Federico, Marcello},
	booktitle={Proceedings of the International Workshop on Spoken Language Translation, Hanoi, Vietnam},
	year={2014}
}


@article{ranzato2015sequence,
	title={Sequence level training with recurrent neural networks},
	author={Ranzato, Marc'Aurelio and Chopra, Sumit and Auli, Michael and Zaremba, Wojciech},
	journal={International Conference on Learning Representations},
	year={2015}
}

@article{spearman1904proof,
	title={The proof and measurement of association between two things},
	author={Spearman, Charles},
	journal={The American journal of psychology},
	volume={15},
	number={1},
	pages={72--101},
	year={1904},
	publisher={JSTOR}
}