Deep Learning bibliography

2013

  • J. Stückler and S. Behnke, “Hierarchical Object Discovery and Dense Modelling From Motion Cues in RGB-D Video,” in IJCAI, 2013.
    [BibTeX]
    @inproceedings{Behnke2003,
    author = {J{\"o}rg St{\"u}ckler and
    Sven Behnke},
    title = {Hierarchical Object Discovery and Dense Modelling From Motion Cues in RGB-D Video},
    booktitle = {IJCAI},
    year = {2013},
    ee = {http://www.aaai.org/ocs/index.php/IJCAI/IJCAI13/paper/view/6977},
    }

  • Y. Bengio, “Deep Learning of Representations: Looking Forward,” , 2013.
    [BibTeX]
    @article{Bengio2013,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1305.0445v2},
    author = {Bengio, Yoshua},
    eprint = {arXiv:1305.0445v2},
    title = {Deep Learning of Representations: Looking Forward},
    year = {2013}
    }

  • Y. Bengio, A. Courville, and P. Vincent, “Representation Learning: A Review and New Perspectives,” IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 35, iss. 8, pp. 1798-828, 2013. doi:10.1109/TPAMI.2013.50
    [BibTeX]
    @article{Bengio2013a,
    author = {Bengio, Yoshua and Courville, Aaron and Vincent, Pascal},
    doi = {10.1109/TPAMI.2013.50},
    issn = {1939-3539},
    journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence},
    month = aug,
    number = {8},
    pages = {1798--828},
    pmid = {23787338},
    title = {Representation Learning: A Review and New Perspectives},
    volume = {35},
    year = {2013}
    }

  • Y. Bengio and A. Courville, “Deep Learning of Representations,” in Handbook on Neural Information Processing, Springer, 2013, pp. 1-28.
    [BibTeX]
    @incollection{bengio2013deep,
    title={Deep Learning of Representations},
    author={Bengio, Yoshua and Courville, Aaron},
    booktitle={Handbook on Neural Information Processing},
    pages={1--28},
    year={2013},
    publisher={Springer}
    }

  • L. Bo, X. Ren, and D. Fox, “Multipath Sparse Coding Using Hierarchical Matching Pursuit,” 2013 IEEE Conference on Computer Vision and Pattern Recognition, pp. 660-667, 2013. doi:10.1109/CVPR.2013.91
    [BibTeX]
    @article{Bo2013,
    author = {Bo, Liefeng and Ren, Xiaofeng and Fox, Dieter},
    doi = {10.1109/CVPR.2013.91},
    isbn = {978-0-7695-4989-7},
    journal = {2013 IEEE Conference on Computer Vision and Pattern Recognition},
    month = jun,
    pages = {660--667},
    publisher = {Ieee},
    title = {Multipath Sparse Coding Using Hierarchical Matching Pursuit},
    year = {2013}
    }

  • L. Bo and D. Fox, “Attribute based object identification,” 2013 IEEE International Conference on Robotics and Automation, pp. 2096-2103, 2013. doi:10.1109/ICRA.2013.6630858
    [BibTeX]
    @article{Bo2013a,
    author = {Bo, Liefeng and Fox, Dieter},
    doi = {10.1109/ICRA.2013.6630858},
    isbn = {978-1-4673-5643-5},
    journal = {2013 IEEE International Conference on Robotics and Automation},
    month = may,
    pages = {2096--2103},
    publisher = {IEEE},
    title = {Attribute based object identification},
    year = {2013}
    }

  • D. Cire{c{s}}an and J. Schmidhuber, “Multi-Column Deep Neural Networks for Offline Handwritten Chinese Character Classification,” 2013.
    [BibTeX]
    @techreport{Ciresan2013,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1309.0261v1},
    author = {Cire{\c{s}}an, Dan and Schmidhuber, Jurgen},
    eprint = {arXiv:1309.0261v1},
    title = {Multi-Column Deep Neural Networks for Offline Handwritten Chinese Character Classification},
    year = {2013}
    }

  • D. Cire{c{s}}an, A. Giusti, L. M. Gambardella, and J. Schmidhuber, “Mitosis Detection in Breast Cancer Histology Images with Deep Neural Networks,” in Medical Image Computing and Computer Assisted Intervention (MICCAI’2013), 2013.
    [BibTeX]
    @inproceedings{Ciresan2013a,
    author = {Cire{\c{s}}an, Dan and Giusti, Alessandro and Gambardella, Luca Maria and Schmidhuber, Jurgen},
    booktitle = {Medical Image Computing and Computer Assisted Intervention ({MICCAI}'2013)},
    title = {Mitosis Detection in Breast Cancer Histology Images with Deep Neural Networks},
    month = Sep,
    year = {2013}
    }

  • A. Coates, B. Huval, T. Wang, D. Wu, B. Catanzaro, and N. Andrew, “Deep learning with COTS HPC systems,” in Proceedings of the 30th International Conference on Machine Learning (ICML-13), 2013, pp. 1337-1345.
    [BibTeX]
    @inproceedings{coates2013deep,
    title={Deep learning with {COTS HPC} systems},
    author={Coates, Adam and Huval, Brody and Wang, Tao and Wu, David and Catanzaro, Bryan and
    Andrew, Ng},
    booktitle={Proceedings of the 30th International Conference on Machine Learning ({ICML}-13)},
    pages={1337--1345},
    year={2013}
    }

  • A. C. Damianou and N. D. Lawrence, “Deep Gaussian Processes,” in Proceedings of the 16th International Conference on Artificial Intelligence and Statistics (AISTATS), 2013, pp. 207-215.
    [BibTeX]
    @inproceedings{damianou2012deep,
    author = {Andreas C. Damianou and
    Neil D. Lawrence},
    title = {Deep {G}aussian Processes},
    booktitle = {Proceedings of the 16th International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    year = {2013},
    pages = {207-215},
    }

  • A. Giusti, D. Cire{c{s}}an, J. Masci, and L. M. Gambardella, “Fast Image Scanning with Deep Max-Pooling Convolutional Neural Networks,” 2013.
    [BibTeX]
    @techreport{Giusti2013,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1302.1700v1},
    author = {Giusti, Alessandro and Cire{\c{s}}an, Dan and Masci, Jonathan and Gambardella, Luca M},
    eprint = {arXiv:1302.1700v1},
    title = {Fast Image Scanning with Deep Max-Pooling Convolutional Neural Networks},
    year = {2013}
    }

  • A. Graves, A. -R. Mohamed, and G. Hinton, “Speech recognition with deep recurrent neural networks,” in 2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2013, pp. 6645-6649. doi:10.1109/ICASSP.2013.6638947
    [BibTeX]
    @INPROCEEDINGS{Graves13,
    author={Graves, A. and Mohamed, A.-R. and Hinton, G.},
    booktitle={2013 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
    title={Speech recognition with deep recurrent neural networks},
    year={2013},
    month=may,
    pages={6645-6649},
    keywords={speech recognition;connectionist temporal classification;deep recurrent neural networks;end-to-end training methods;long short-term memory RNN architecture;sequential data;speech recognition;Acoustics;Noise;Recurrent neural networks;Speech recognition;Training;Vectors;deep neural networks;recurrent neural networks;speech recognition},
    doi={10.1109/ICASSP.2013.6638947},
    ISSN={1520-6149},
    }

  • C. Gulcehre and Y. Bengio, “Knowledge matters: Importance of prior information for optimization,” 2013.
    [BibTeX]
    @techreport{gulcehre2013knowledge,
    title={Knowledge matters: Importance of prior information for optimization},
    author={Gulcehre, Caglar and Bengio, Yoshua},
    archivePrefix = {arXiv},
    arxivId = {arXiv:1301.4083},
    year={2013}
    }

  • Y. Kim, H. Lee, and E. M. Provost, “Deep learning for robust feature generation in audiovisual emotion recognition,” in Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on, 2013, pp. 3687-3691. doi:10.1109/ICASSP.2013.6638346
    [BibTeX]
    @INPROCEEDINGS{Kim2013,
    author={Yelin Kim and Honglak Lee and Provost, E.M.},
    booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2013 IEEE International Conference on},
    title={Deep learning for robust feature generation in audiovisual emotion recognition},
    year={2013},
    month=may,
    pages={3687-3691},
    doi={10.1109/ICASSP.2013.6638346},
    ISSN={1520-6149},
    }

  • H. P. Martinez, Y. Bengio, and G. N. Yannakakis, “Learning Deep Physiological Models of Affect,” IEEE Computational Intelligence Magazine, vol. 8, iss. 2, pp. 20-33, 2013.
    [BibTeX]
    @article{martinez2013learning,
    title={Learning Deep Physiological Models of Affect},
    author={Martinez, H{\'e}ctor Perez and Bengio, Yoshua and Yannakakis, Georgios N},
    journal={IEEE Computational Intelligence Magazine},
    volume={8},
    number={2},
    pages={20--33},
    year={2013},
    publisher={IEEE}
    }

  • J. Masci, A. Giusti, G. Fricout, D. Cire{c{s}}an, and J. Schmidhuber, “A Fast Learning Algorithm for Image Segmentation with Max-Pooling Convolutional Networks,” in Proceedings of IEEE International Conference on Image Processing (ICIP), 2013.
    [BibTeX]
    @inproceedings{Masci2013,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1302.1690v1},
    author = {Masci, Jonathan and Giusti, Alessandro and Fricout, Gabriel and Cire{\c{s}}an, Dan and Schmidhuber, J{\"u}rgen},
    booktitle = {Proceedings of IEEE International Conference on Image Processing ({ICIP})},
    eprint = {arXiv:1302.1690v1},
    title = {A Fast Learning Algorithm for Image Segmentation with Max-Pooling Convolutional Networks},
    year = {2013}
    }

  • J. Schmidhuber, “My First Deep Learning System of 1991+ Deep Learning Timeline 1962-2013,” , 2013.
    [BibTeX]
    @article{schmidhuber2013my,
    title={My First Deep Learning System of 1991+ Deep Learning Timeline 1962-2013},
    author={Schmidhuber, J{\"u}rgen},
    archivePrefix = {arXiv},
    arxivId = {arXiv:1312.5548},
    eprint={arXiv:1312.5548},
    year={2013}
    }

  • L. Wan, M. Zeiler, S. Zhang, Y. L. Cun, and R. Fergus, “Regularization of Neural Networks using DropConnect,” in Proceedings of the 30th International Conference on Machine Learning (ICML-13), 2013, pp. 1058-1066.
    [BibTeX]
    @inproceedings{wan2013regularization,
    title={Regularization of Neural Networks using {D}rop{C}onnect},
    author={Wan, Li and Zeiler, Matthew and Zhang, Sixin and Cun, Yann L and Fergus, Rob},
    booktitle={Proceedings of the 30th International Conference on Machine Learning ({ICML}-13)},
    pages={1058--1066},
    year={2013}
    }

  • L. Xie and X. He, “Picture tags and world knowledge: learning tag relations from visual semantic sources,” in ACM Multimedia, 2013, pp. 967-976.
    [BibTeX]
    @inproceedings{Xie2013,
    author = {Lexing Xie and
    Xuming He},
    title = {Picture tags and world knowledge: learning tag relations
    from visual semantic sources},
    booktitle = {ACM Multimedia},
    year = {2013},
    pages = {967-976},
    ee = {http://doi.acm.org/10.1145/2502081.2502113},
    bibsource = {DBLP, http://dblp.uni-trier.de}
    }

  • K. Yao, G. Zweig, M. Hwang, Y. Shi, and D. Yu, “Recurrent neural networks for language understanding,” in Proceedings of Interspeech, 2013, pp. 104-108.
    [BibTeX]
    @inproceedings{yao2013recurrent,
    title={Recurrent neural networks for language understanding},
    author={Yao, Kaisheng and Zweig, Geoffrey and Hwang, Mei-Yuh and Shi, Yangyang and Yu,
    Dong},
    booktitle={Proceedings of Interspeech},
    pages={104--108},
    year={2013}
    }

  • M. D. Zeiler and R. Fergus, “Stochastic Pooling for Regularization of Deep Convolutional Neural Networks,” in International Conference on Learning Representations (ICLR), 2013.
    [BibTeX]
    @inproceedings{Zeiler2013,
    author = {Zeiler, Matthew D and Fergus, Rob},
    booktitle = {International Conference on Learning Representations ({ICLR})},
    title = {Stochastic Pooling for Regularization of Deep Convolutional Neural Networks},
    year = {2013}
    }

2012

  • Q. Le, M. Ranzato, R. Monga, M. Devin, K. Chen, G. Corrado, J. Dean, and A. Ng, “Building high-level features using large scale unsupervised learning,” in International Conference in Machine Learning (ICML), 2012.
    [BibTeX]
    @inproceedings{38115,
    title = {Building high-level features using large scale unsupervised learning},
    author = {Quoc Le and Marc'Aurelio Ranzato and Rajat Monga and Matthieu Devin and Kai Chen and Greg Corrado and Jeff Dean and Andrew Ng},
    year = 2012,
    booktitle = {International Conference in Machine Learning ({ICML})}
    }

  • G. Alain, Y. Bengio, and S. Rifai, “Regularized Auto-Encoders Estimate Local Statistics,” Département d’Informatique et de Recherche Opérationnelle, Université de Montréal 2012.
    [BibTeX]
    @techreport{Alain2012,
    author = {Alain, Guillaume and Bengio, Yoshua and Rifai, Salah},
    archivePrefix = {arXiv},
    arxivId = {arXiv:1211.4246},
    title = {Regularized Auto-Encoders Estimate Local Statistics},
    institution = {D{\'{e}}partement d'Informatique et de Recherche Op{\'{e}}rationnelle, Universit{\'{e}} de Montr{\'{e}}al},
    year = {2012}
    }

  • P. Baldi and P. Sadowski, “Deep Target Algorithms for Deep Learning,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
    [BibTeX]
    @inproceedings{Baldi2012,
    author = {Baldi, Pierre and Sadowski, Peter},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    title = {Deep Target Algorithms for Deep Learning},
    year = {2012}
    }

  • D. Cire{c{s}}an, A. Giusti, J. Schmidhuber, and others, “Deep neural networks segment neuronal membranes in electron microscopy images,” in Advances in Neural Information Processing Systems (NIPS), 2012, pp. 2852-2860.
    [BibTeX]
    @inproceedings{ciresan2012deep,
    title={Deep neural networks segment neuronal membranes in electron microscopy images},
    author={Cire{\c{s}}an, Dan and Giusti, Alessandro and Schmidhuber, Juergen and others},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={2852--2860},
    year={2012}
    }

  • D. Cire{c{s}}an, U. Meier, and J. Schmidhuber, “Multi-column deep neural networks for image classification,” in 2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2012, pp. 3642-3649.
    [BibTeX]
    @inproceedings{ciresan2012multi,
    title={Multi-column deep neural networks for image classification},
    author={Cire{\c{s}}an, Dan and Meier, Ueli and Schmidhuber, J{\"u}rgen},
    booktitle={2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    pages={3642--3649},
    year={2012},
    }

  • D. C. Cire{c{s}}an, U. Meier, and J. Schmidhuber, “Transfer learning for Latin and Chinese characters with Deep Neural Networks,” in 2012 International Joint Conference on Neural Networks (IJCNN), 2012.
    [BibTeX]
    @inproceedings{ciresan2012transfer,
    title={Transfer learning for Latin and Chinese characters with Deep Neural Networks},
    author={Cire{\c{s}}an, Dan Claudiu and Meier, Ueli and Schmidhuber, J{\"u}rgen},
    booktitle={2012 International Joint Conference on Neural Networks ({IJCNN})},
    year={2012},
    }

  • G. E. Dahl, R. P. Adams, and H. Larochelle, “Training restricted Boltzmann machines on word observations,” , 2012.
    [BibTeX]
    @article{dahl2012training,
    title={Training restricted {Boltzmann} machines on word observations},
    author={Dahl, George E and Adams, Ryan P and Larochelle, Hugo},
    archiveId={arXiv:1202.5695},
    eprint={arXiv:1202.5695v1},
    year={2012}
    }

  • S. Dieleman and B. Schrauwen, “Accelerating sparse restricted Boltzmann machine training using non-Gaussianity measures,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
    [BibTeX]
    @inproceedings{dieleman2012accelerating,
    title={Accelerating sparse restricted {Boltzmann} machine training using non-Gaussianity
    measures},
    author={Dieleman, Sander and Schrauwen, Benjamin},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2012},
    }

  • A. Freytag, E. Rodner, P. Bodesheim, and J. Denzler, “Beyond Classification — Large-scale Gaussian Process Inference and Uncertainty Prediction,” , pp. 2-6, 2012.
    [BibTeX]
    @article{Freytag2012,
    author = {Freytag, Alexander and Rodner, Erik and Bodesheim, Paul and Denzler, Joachim},
    booktitle = {Proceedings of the $11^{th}$ Asian Conference on Computer Vision ({ACCV})},
    pages = {2--6},
    title = {Beyond Classification -- Large-scale {G}aussian Process Inference and Uncertainty Prediction},
    year = {2012},
    location = {Daejeon, Korea},
    month = nov
    }

  • I. J. Goodfellow, A. Courville, and Y. Bengio, “Joint Training of Partially-Directed Deep Boltzmann Machines,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
    [BibTeX]
    @inproceedings{Goodfellow2012,
    author = {Goodfellow, Ian J and Courville, Aaron and Bengio, Yoshua},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    title = {Joint Training of Partially-Directed Deep {Boltzmann} Machines},
    year = {2012}
    }

  • C. Haeusler and A. Susemihl, “Temporal Autoencoding Restricted Boltzmann Machine,” , 2012.
    [BibTeX]
    @article{Haeusler2012,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1210.8353v1},
    author = {Haeusler, Chris and Susemihl, Alex},
    eprint = {arXiv:1210.8353v1},
    title = {Temporal Autoencoding Restricted {Boltzmann} Machine},
    institution = {Neuroinformatics and Theoretical Neuroscience Group and Department of Artificial Intelligence, Bernstein Center for Computational Neuroscience Berlin, Germany},
    year = {2012}
    }

  • G. Hinton, “A Practical Guide to Training Restricted Boltzmann Machines,” in Neural Networks: Tricks of the Trade, G. Montavon, G. Orr, and K. Müller, Eds., Springer Berlin Heidelberg, 2012, pp. 599-619. doi:10.1007/978-3-642-35289-8_32
    [BibTeX] [Download PDF]
    @inbook{hinton2012practical,
    year={2012},
    isbn={978-3-642-35288-1},
    booktitle={Neural Networks: Tricks of the Trade},
    series={Lecture Notes in Computer Science},
    editor={Montavon, Grégoire and Orr, GenevièveB. and Müller, Klaus-Robert},
    doi={10.1007/978-3-642-35289-8_32},
    title={A Practical Guide to Training Restricted {B}oltzmann Machines},
    url={http://dx.doi.org/10.1007/978-3-642-35289-8_32},
    publisher={Springer Berlin Heidelberg},
    author={Hinton, GeoffreyE.},
    pages={599-619}
    }

  • G. B. Huang and E. Learned-Miller, “Learning hierarchical representations for face verification with convolutional deep belief networks,” in 2012 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2012, pp. 2518-2525. doi:10.1109/CVPR.2012.6247968
    [BibTeX]
    @inproceedings{Huang2012,
    author = {Huang, G. B. and Learned-Miller, E.},
    booktitle = {2012 IEEE Conference on Computer Vision and Pattern Recognition ({CVPR})},
    doi = {10.1109/CVPR.2012.6247968},
    isbn = {978-1-4673-1228-8},
    issn = {10636919},
    pages = {2518--2525},
    title = {Learning hierarchical representations for face verification with convolutional deep belief networks},
    year = {2012}
    }

  • S. J. Hwang, K. Grauman, and F. Sha, “Semantic Kernel Forests from Multiple Taxonomies,” in Advances in Neural Information Processing Systems, 2012, pp. 1727-1735.
    [BibTeX]
    @inproceedings{hwang2012semantic,
    title={Semantic Kernel Forests from Multiple Taxonomies},
    author={Hwang, Sung J and Grauman, Kristen and Sha, Fei},
    booktitle={Advances in Neural Information Processing Systems},
    pages={1727--1735},
    year={2012}
    }

  • Y. Jia, J. Abbott, J. Austerweil, T. Griffiths, and T. Darrell, “Visually-Grounded Bayesian Word Learning,” EECS Department, University of California, Berkeley, UCB/EECS-2012-202, 2012.
    [BibTeX] [Abstract]

    Learning the meaning of a novel noun from a few labeled objects is one of the simplest aspects of learning a language, but approximating human performance on this task is still a significant challenge for current machine learning systems. Current methods typically fail to find the appropriate level of generalization in a concept hierarchy for a given visual stimulus. Recent work in cognitive science on Bayesian models of word learning partially addresses this challenge, but it assumes that the labels of objects are given (hence no object recognition) and it has only been evaluated in small domains. We present a system for learning nouns directly from images, using probabilistic predictions generated by visual classifiers as the input to Bayesian word learning, and compare this system to human performance in an automated, large-scale experiment. The system captures a significant proportion of the variance in human responses. Combining the uncertain outputs of the visual classifiers with the ability to identify an appropriate level of abstraction that comes from Bayesian word learning allows the system to outperform alternatives that either cannot deal with visual stimuli or use a more conventional computer vision approach.

    @techreport{Jia2012,
    Author = {Jia, Yangqing and Abbott, Joshua and Austerweil, Joseph and Griffiths, Thomas and Darrell, Trevor},
    Title = {Visually-Grounded {B}ayesian Word Learning},
    Institution = {EECS Department, University of California, Berkeley},
    Year = {2012},
    Month = {Oct},
    Number = {UCB/EECS-2012-202},
    Abstract = {Learning the meaning of a novel noun from a few labeled objects is one of the simplest aspects of learning a language, but approximating human performance on this task is still a significant challenge for current machine learning systems. Current methods typically fail to find the appropriate level of generalization in a concept hierarchy for a given visual stimulus. Recent work in cognitive science on Bayesian models of word learning partially addresses this challenge, but it assumes that the labels of objects are given (hence no object recognition) and it has only been evaluated in small domains. We present a system for learning nouns directly from images, using probabilistic predictions generated by visual classifiers as the input to Bayesian word learning, and compare this system to human performance in an automated, large-scale experiment. The system captures a significant proportion of the variance in human responses. Combining the uncertain outputs of the visual classifiers with the ability to identify an appropriate level of abstraction that comes from Bayesian word learning allows the system to outperform alternatives that either cannot deal with visual stimuli or use a more conventional computer vision approach.}
    }

  • U. Koster, J. Sohl-Dickstein, and B. Olshausen, “Modeling Laminar Recordings from Visual Cortex with Semi-Restricted Boltzmann Machines,” in NIPS Workshop on Deep Learning and Unsupervised Feature Generation, 2012.
    [BibTeX]
    @inproceedings{Koster2012,
    author = {Koster, Urs and Sohl-Dickstein, Jascha and Olshausen, Bruno},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Generation},
    title = {Modeling Laminar Recordings from Visual Cortex with Semi-Restricted {Boltzmann} Machines},
    year = {2012}
    }

  • A. Krizhevsky, I. Sutskever, and G. Hinton, “ImageNet Classification with Deep Convolutional Neural Networks,” in Advances in Neural Information Processing Systems (NIPS), 2012, pp. 1106-1114.
    [BibTeX]
    @inproceedings{krizhevsky2012imagenet,
    title={Image{N}et Classification with Deep Convolutional Neural Networks},
    author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoff},
    booktitle={Advances in Neural Information Processing Systems {(NIPS)}},
    pages={1106--1114},
    year={2012}
    }

  • M. Längkvist and A. Loutfi, “Not all signals are created equal : Dynamic Objective Auto-Encoder for Multivariate Data,” NIPS Neural Information Processing Systems, 2012.
    [BibTeX]
    @article{Langkvist2012,
    author = {L\"{a}ngkvist, Martin and Loutfi, Amy},
    journal = {{NIPS} Neural Information Processing Systems},
    title = {Not all signals are created equal : Dynamic Objective Auto-Encoder for Multivariate Data},
    year = {2012}
    }

  • W. Lee and M. S. Lewicki, “Adaptive representations of scenes based on ICA mixture model,” in NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @inproceedings{Lee2012,
    author = {Lee, Wooyoung and Lewicki, Michael S},
    booktitle={{NIPS} Workshop -- Big Data Meets Computer Vision:
    First International Workshop on Large Scale Visual Recognition and Retrieval},
    title = {Adaptive representations of scenes based on ICA mixture model},
    year = {2012}
    }

  • A. R. Mahmood and R. S. Sutton, “Online Representation Search and Its Interactions with Unsupervised Learning,” in Advances in Neural Information Processing Systems (NIPS), 2012.
    [BibTeX]
    @inproceedings{Mahmood2012,
    author = {Mahmood, Ashique Rupam and Sutton, Richard S},
    booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
    title = {Online Representation Search and Its Interactions with Unsupervised Learning},
    year = {2012}
    }

  • J. Masci, U. Meier, D. Cire{c{s}}an, J. Schmidhuber, and G. Fricout, “Steel defect classification with max-pooling convolutional neural networks,” in The 2012 International Joint Conference on Neural Networks (IJCNN), 2012.
    [BibTeX]
    @inproceedings{masci2012steel,
    title={Steel defect classification with max-pooling convolutional neural networks},
    author={Masci, Jonathan and Meier, Ueli and Cire{\c{s}}an, Dan and Schmidhuber, J{\"u}rgen and Fricout, Gabriel},
    booktitle={The 2012 International Joint Conference on Neural Networks (IJCNN)},
    year={2012},
    }

  • G. Mesnil, Y. Dauphin, X. Glorot, S. Rifai, Y. Bengio, I. J. Goodfellow, E. Lavoie, X. Muller, G. Desjardins, D. Warde-Farley, and others, “Unsupervised and Transfer Learning Challenge: a Deep Learning Approach.,” Journal of Machine Learning Research-Proceedings Track, vol. 27, pp. 97-110, 2012.
    [BibTeX]
    @article{mesnil2012unsupervised,
    title={Unsupervised and Transfer Learning Challenge: a Deep Learning Approach.},
    author={Mesnil, Gr{\'e}goire and Dauphin, Yann and Glorot, Xavier and Rifai, Salah and Bengio,
    Yoshua and Goodfellow, Ian J and Lavoie, Erick and Muller, Xavier and Desjardins, Guillaume and
    Warde-Farley, David and others},
    journal={Journal of Machine Learning Research-Proceedings Track},
    volume={27},
    pages={97--110},
    year={2012}
    }

  • G. Montufar and J. Morton, “Kernels and Submodels of Deep Belief Networks,” , 2012.
    [BibTeX]
    @article{Montufar2012,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1211.0932v1},
    author = {Montufar, Guido and Morton, Jason},
    eprint = {arXiv:1211.0932v1},
    title = {Kernels and Submodels of Deep Belief Networks},
    year = {2012}
    }

  • H. Nakayama, “Aggregating Descriptors with Local Gaussian Metrics,” in NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @inproceedings{Nakayama2012,
    author = {Nakayama, Hideki},
    booktitle = {{NIPS} Workshop -- Big Data Meets Computer Vision:
    First International Workshop on Large Scale Visual Recognition and Retrieval},
    title = {Aggregating Descriptors with Local Gaussian Metrics},
    year = {2012}
    }

  • B. Olshausen, “Can deep learning provide deep insights about visual representation?,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
    [BibTeX]
    @inproceedings{Olshausen,
    author = {Olshausen, Bruno},
    title = {Can deep learning provide deep insights about visual representation?},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year = {2012},
    }

  • C. J. Pal and K. Hasan, “Creating a Big Data Resource from the Faces of Wikipedia,” in NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @inproceedings{Pal2012,
    author = {Pal, Christopher J and Hasan, Kamrul},
    title={Creating a Big Data Resource from the Faces of {W}ikipedia},
    journal = {BigVision 2012},
    booktitle={{NIPS} Workshop -- Big Data Meets Computer Vision:
    First International Workshop on Large Scale Visual Recognition and Retrieval},
    year = {2012}
    }

  • R. Pascanu, T. Mikolov, and Y. Bengio, “On the difficulty of training recurrent neural networks,” in Proceedings of The 30th International Conference on Machine Learning, 2012, pp. 1310-1318.
    [BibTeX]
    @inproceedings{Pascanu2012,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1211.5063v2},
    author = {Pascanu, Razvan and Mikolov, Tomas and Bengio, Yoshua},
    eprint = {arXiv:1211.5063v2},
    booktitle = {Proceedings of The 30th International Conference on Machine Learning},
    pages = {1310--1318},
    title = {On the difficulty of training recurrent neural networks},
    year = {2012},
    }

  • R. Pascanu, T. Mikolov, and Y. Bengio, “Understanding the exploding gradient problem,” Computing Research Repository (CoRR), vol. abs/1211.5063, 2012.
    [BibTeX]
    @article{Pascanu2012a,
    author = {Razvan Pascanu and
    Tomas Mikolov and
    Yoshua Bengio},
    title = {Understanding the exploding gradient problem},
    journal = {Computing Research Repository {(CoRR)}},
    volume = {abs/1211.5063},
    year = {2012},
    bibsource = {DBLP, http://dblp.uni-trier.de}
    }

  • T. Raiko, H. Valpola, and Y. LeCun, “Deep learning made easier by linear transformations in perceptrons,” in International Conference on Artificial Intelligence and Statistics (AISTATS), 2012, pp. 924-932.
    [BibTeX]
    @inproceedings{raiko2012deep,
    title={Deep learning made easier by linear transformations in perceptrons},
    author={Raiko, Tapani and Valpola, Harri and LeCun, Yann},
    booktitle={International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    pages={924--932},
    year={2012}
    }

  • R. Salakhutdinov and G. Hinton, “An efficient learning procedure for deep Boltzmann machines,” Neural Computation, vol. 24, iss. 8, pp. 1967-2006, 2012.
    [BibTeX]
    @article{salakhutdinov2012efficient,
    title={An efficient learning procedure for deep {Boltzmann} machines},
    author={Salakhutdinov, Ruslan and Hinton, Geoffrey},
    journal={Neural Computation},
    volume={24},
    number={8},
    pages={1967--2006},
    year={2012},
    publisher={MIT Press}
    }

  • H. Schulz and S. Behnke, “Deep Learning: Layer-wise Learning of Feature Hierarchies,” , vol. 26, iss. 4, pp. 357-363, 2012.
    [BibTeX]
    @ARTICLE{schulz12ki,
    author = {Hannes Schulz and Sven Behnke},
    title = {Deep Learning: Layer-wise Learning of Feature Hierarchies},
    journaltitle = {K\"{u}nstliche Intelligenz},
    year = {2012},
    issuetitle = {Neural Learning Paradigms},
    volume = {26},
    number = {4},
    pages = {357--363},
    keywords={deeplearning},
    }

  • L. Y. Shao, “Linear-Nonlinear-Poisson Neurons Can Do Inference On Deep Boltzmann Machines,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2012.
    [BibTeX]
    @inproceedings{Shao2012,
    author = {Louis Yuanlong Shao},
    title = {Linear-Nonlinear-Poisson Neurons Can Do Inference On Deep
    Boltzmann Machines},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year = {2012},
    }

  • R. Socher, B. Huval, B. Bhat, C. D. Manning, and A. Ng, “Convolutional-recursive deep learning for 3D object classification,” in Advances in Neural Information Processing Systems (NIPS), 2012, pp. 665-673.
    [BibTeX]
    @inproceedings{socher2012convolutional,
    title={Convolutional-recursive deep learning for 3D object classification},
    author={Socher, Richard and Huval, Brody and Bhat, Bharath and Manning, Christopher D and Ng,
    Andrew},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={665--673},
    year={2012}
    }

  • R. Socher, B. Huval, C. D. Manning, and A. Y. Ng, “Semantic compositionality through recursive matrix-vector spaces,” in Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning, 2012, pp. 1201-1211.
    [BibTeX]
    @inproceedings{socher2012semantic,
    title={Semantic compositionality through recursive matrix-vector spaces},
    author={Socher, Richard and Huval, Brody and Manning, Christopher D and Ng, Andrew Y},
    booktitle={Proceedings of the 2012 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning},
    pages={1201--1211},
    year={2012},
    organization={Association for Computational Linguistics}
    }

  • Y. Tang, R. Salakhutdinov, and G. Hinton, “Robust Boltzmann Machines for Recognition and Denoising,” IEEE Conference on Computer Vision and Pattern Recognition (CVPR), pp. 2264-2271, 2012. doi:10.1109/CVPR.2012.6247936
    [BibTeX]
    @article{Tang2012,
    author = {Tang, Yichuan and Salakhutdinov, Ruslan and Hinton, Geoffrey},
    doi = {10.1109/CVPR.2012.6247936},
    isbn = {978-1-4673-1228-8},
    journal = {IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    month = jun,
    pages = {2264--2271},
    publisher = {IEEE},
    title = {Robust {Boltzmann} Machines for Recognition and Denoising},
    year = {2012}
    }

  • M. Villegas, R. Paredes, and D. Vera, “A $k$-NN Approach for Scalable Image Annotation Using General Web Data,” in NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @inproceedings{Villegas2009,
    author = {Villegas, Mauricio and Paredes, Roberto and Vera, De},
    booktitle={{NIPS} Workshop -- Big Data Meets Computer Vision:
    First International Workshop on Large Scale Visual Recognition and Retrieval},
    title = {A $k$-{N}{N} Approach for Scalable Image Annotation Using General {W}eb Data},
    year = {2012}
    }

  • B. A. White, A. E. Miller, and L. S. Davis, “Classifier-as-a-Service: Online Query of Cascades and Operating Points,” in NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @inproceedings{White2012,
    author = {White, Brandyn A and Miller, Andrew E and Davis, Larry S},
    title = {Classifier-as-a-Service: Online Query of Cascades and Operating Points},
    year = {2012},
    booktitle={{NIPS} Workshop -- Big Data Meets Computer Vision:
    First International Workshop on Large Scale Visual Recognition and Retrieval},
    }

  • J. Xie, L. Xu, and E. Chen, “Image denoising and inpainting with deep neural networks,” in Advances in Neural Information Processing Systems, 2012, pp. 350-358.
    [BibTeX]
    @inproceedings{xie2012image,
    title={Image denoising and inpainting with deep neural networks},
    author={Xie, Junyuan and Xu, Linli and Chen, Enhong},
    booktitle={Advances in Neural Information Processing Systems},
    pages={350--358},
    year={2012}
    }

  • C. Xiong and J. J. Corso, “Randomly Multi-view Clustering for Hashing,” NIPS Workshop — Big Data Meets Computer Vision: First International Workshop on Large Scale Visual Recognition and Retrieval, 2012.
    [BibTeX]
    @article{Xiong2012,
    author = {Xiong, Caiming and Corso, Jason J},
    journal = {{NIPS} Workshop -- Big Data Meets Computer Vision: First International
    Workshop on Large Scale Visual Recognition and Retrieval},
    title = {Randomly Multi-view Clustering for Hashing},
    year = {2012}
    }

  • M. D. Zeiler and R. Fergus, “Differentiable Pooling for Hierarchical Feature Learning,” , 2012.
    [BibTeX]
    @article{zeiler2012differentiable,
    title={Differentiable Pooling for Hierarchical Feature Learning},
    author={Zeiler, Matthew D and Fergus, Rob},
    archivePrefix = {arXiv},
    arXivID = {arXiv:1207.0151},
    year={2012}
    }

  • W. Zou, A. Ng, S. Zhu, and K. Yu, “Deep learning of invariant features via simulated fixations in video,” in Advances in Neural Information Processing Systems 25, 2012, pp. 3212-3220.
    [BibTeX]
    @inproceedings{zou2012deep,
    title={Deep learning of invariant features via simulated fixations in video},
    author={Zou, Will and Ng, Andrew and Zhu, Shenghuo and Yu, Kai},
    booktitle={Advances in Neural Information Processing Systems 25},
    pages={3212--3220},
    year={2012}
    }

2011

  • D. Benbouzid, R. Busa-Fekete, and B. Kégl, “MDDAG: learning deep decision DAGs in a Markov decision process setup,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{benbouzid2011mddag,
    title={{MDDAG}: learning deep decision {DAGs} in a {Markov} decision process setup},
    author={Benbouzid, Djalel and Busa-Fekete, R{\'o}bert and K{\'e}gl, Bal{\'a}zs},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • M. Blum, J. T. Springenberg, J. Wulfing, and M. Riedmiller, “On the applicability of unsupervised feature learning for object recognition in RGB-D data,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{blum2011applicability,
    title={On the applicability of unsupervised feature learning for object recognition in {RGB-D} data},
    author={Blum, Manuel and Springenberg, Jost Tobias and Wulfing, J and Riedmiller, Martin},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • O. Chapelle and D. Erhan, “Improved Preconditioner for Hessian Free Optimization,” in NIPS Workshop on Deep Learning and Learning Feature Hierarchies, 2011.
    [BibTeX]
    @inproceedings{Chapelle2011,
    author = {Chapelle, Olivier and Erhan, Dumitru},
    booktitle = {{NIPS} Workshop on Deep Learning and Learning Feature Hierarchies},
    title = {Improved Preconditioner for {Hessian} Free Optimization},
    year = {2011}
    }

  • J. Chien and Y. Chang, “Unsupervised Structural Learning of Word Topics and Sentence Topics,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{Chien2011,
    author = {Chien, Jen-Tzung and Chang, Ying-Lan},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    title = {Unsupervised Structural Learning of Word Topics and Sentence Topics},
    year = {2011}
    }

  • K. Cho, T. Raiko, and A. Ilin, “Gaussian-Bernoulli Deep Boltzmann Machine,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{cho2011gaussian,
    title={Gaussian-{Bernoulli} Deep {Boltzmann} Machine},
    author={Cho, K and Raiko, Tapani and Ilin, Alexander},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • K. Cho, T. Raiko, A. Ilin, and J. Karhunen, “A Two-stage Pretraining Algorithm for Deep Boltzmann Machines,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{Cho2012,
    author = {Cho, Kyunghyun and Raiko, Tapani and Ilin, Alexander and Karhunen, Juha},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    title = {A Two-stage Pretraining Algorithm for Deep {Boltzmann} Machines},
    year = {2011}
    }

  • D. C. Cire{c{s}}an, U. Meier, J. Masci, L. M. Gambardella, and J. Schmidhuber, “Flexible, high performance convolutional neural networks for image classification,” in Proceedings of the $22^{nd}$ International Joint Conference on Artificial Intelligence-Volume 2, 2011, pp. 1237-1242.
    [BibTeX]
    @inproceedings{cirecsan2011flexible,
    title={Flexible, high performance convolutional neural networks for image classification},
    author={Cire{\c{s}}an, Dan C and Meier, Ueli and Masci, Jonathan and Gambardella, Luca M and Schmidhuber, J{\"u}rgen},
    booktitle={Proceedings of the $22^{nd}$ International Joint Conference on Artificial Intelligence-Volume 2},
    pages={1237--1242},
    year={2011},
    organization={AAAI Press}
    }

  • D. Cire{c{s}}an, U. Meier, J. Masci, and J. Schmidhuber, “A committee of neural networks for traffic sign classification,” in Neural Networks (IJCNN), The 2011 International Joint Conference on, 2011, pp. 1918-1921.
    [BibTeX]
    @inproceedings{ciresan2011committee,
    title={A committee of neural networks for traffic sign classification},
    author={Cire{\c{s}}an, Dan and Meier, Ueli and Masci, Jonathan and Schmidhuber, J{\"u}rgen},
    booktitle={Neural Networks (IJCNN), The 2011 International Joint Conference on},
    pages={1918--1921},
    year={2011},
    organization={IEEE}
    }

  • D. C. Cire{c{s}}an, U. Meier, L. M. Gambardella, and J. Schmidhuber, “Convolutional neural network committees for handwritten character classification,” in 2011 International Conference on Document Analysis and Recognition (ICDAR), 2011, pp. 1135-1139.
    [BibTeX]
    @inproceedings{ciresan2011convolutional,
    title={Convolutional neural network committees for handwritten character classification},
    author={Cire{\c{s}}an, Dan Claudiu and Meier, Ueli and Gambardella, Luca Maria and Schmidhuber, J{\"u}rgen},
    booktitle={2011 International Conference on Document Analysis and Recognition (ICDAR)},
    pages={1135--1139},
    year={2011},
    organization={IEEE}
    }

  • D. Cire{c{s}}an, U. Meier, L. M. Gambardella, and J. Schmidhuber, “Handwritten Digit Recognition with a Committee of Deep Neural Nets on GPUs,” 2011.
    [BibTeX]
    @techreport{Ciresan2011d,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1103.4487v1},
    author = {Cire{\c{s}}an, Dan and Meier, Ueli and Gambardella, Luca Maria and Schmidhuber, Jurgen},
    eprint = {arXiv:1103.4487v1},
    title = {Handwritten Digit Recognition with a Committee of Deep Neural Nets on GPUs},
    year = {2011}
    }

  • M. Längkvist and A. Loutfi, “Unsupervised feature learning for electronic nose data applied to Bacteria Identification in Blood.,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{langkvist2011unsupervised,
    title={Unsupervised feature learning for electronic nose data applied to Bacteria Identification in
    Blood.},
    author={L{\"a}ngkvist, Martin and Loutfi, Amy},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • Q. V. Le, A. Coates, B. Prochnow, and A. Y. Ng, “On Optimization Methods for Deep Learning,” in Proceedings of The 28th International Conference on Machine Learning (ICML), 2011, pp. 265-272.
    [BibTeX]
    @inproceedings{Le2011,
    author = {Le, Quoc V and Coates, Adam and Prochnow, Bobby and Ng, Andrew Y},
    booktitle = {Proceedings of The 28th International Conference on Machine Learning ({ICML})},
    pages = {265--272},
    title = {On Optimization Methods for Deep Learning},
    year = {2011}
    }

  • H. Lee, R. Grosse, R. Ranganath, and A. Y. Ng, “Unsupervised learning of hierarchical representations with convolutional deep belief networks,” Communications of the ACM, vol. 54, iss. 10, pp. 95-103, 2011.
    [BibTeX]
    @article{lee2011unsupervised,
    title={Unsupervised learning of hierarchical representations with convolutional deep belief
    networks},
    author={Lee, Honglak and Grosse, Roger and Ranganath, Rajesh and Ng, Andrew Y},
    journal={Communications of the ACM},
    volume={54},
    number={10},
    pages={95--103},
    year={2011},
    publisher={ACM}
    }

  • U. Meier, D. C. Cire{c{s}}an, L. M. Gambardella, and J. Schmidhuber, “Better digit recognition with a committee of simple neural nets,” in International Conference on Document Analysis and Recognition (ICDAR), 2011, pp. 1250-1254.
    [BibTeX]
    @inproceedings{meier2011better,
    title={Better digit recognition with a committee of simple neural nets},
    author={Meier, Ueli and Cire{\c{s}}an, Dan Claudiu and Gambardella, Luca Maria and Schmidhuber, J{\"u}rgen},
    booktitle={International Conference on Document Analysis and Recognition (ICDAR)},
    pages={1250--1254},
    year={2011},
    organization={IEEE}
    }

  • R. Memisevic, “On spatio-temporal sparse coding: Analysis and an algorithm,” NIPS Workshop in Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @article{Memisevic2011,
    author = {Memisevic, Roland},
    journal = {{NIPS} Workshop in Deep Learning and Unsupervised Feature Learning},
    title = {On spatio-temporal sparse coding: Analysis and an algorithm},
    year = {2011}
    }

  • A-R. Mohamed, T. N. Sainath, G. Dahl, B. Ramabhadran, G. E. Hinton, and M. A. Picheny, “Deep belief networks using discriminative features for phone recognition,” in IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2011), 2011, pp. 5060-5063.
    [BibTeX]
    @inproceedings{mohamed2011deep,
    title={Deep belief networks using discriminative features for phone recognition},
    author={Mohamed, A-R and Sainath, Tara N and Dahl, George and Ramabhadran, Bhuvana and Hinton, Geoffrey E and Picheny, Michael A},
    booktitle={IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP 2011)},
    pages={5060--5063},
    year={2011},
    organization={IEEE}
    }

  • G. Montavon, M. Braun, and K. -R. Müller, “Importance of Cross-Layer Cooperation for Learning Deep Feature Hierarchies,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{Montavon2011,
    author = {G. Montavon and M. Braun and K.-R. M{\"u}ller},
    title = {Importance of Cross-Layer Cooperation for Learning Deep Feature Hierarchies},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year = {2011},
    }

  • J. Nagi, F. Ducatelle, G. D. A. Caro, D. Cire{c{s}}an, U. Meier, A. Giusti, and L. M. Gambardella, “Max-Pooling Convolutional Neural Networks for Vision-based Hand Gesture Recognition,” , pp. 342-347, 2011.
    [BibTeX]
    @article{Nagi2011,
    author = {Nagi, Jawad and Ducatelle, Frederick and Caro, Gianni A Di and Cire{\c{s}}an, Dan and Meier, Ueli and Giusti, Alessandro and Gambardella, Luca Maria},
    isbn = {9781457702426},
    pages = {342--347},
    title = {Max-Pooling Convolutional Neural Networks for Vision-based Hand Gesture Recognition},
    year = {2011}
    }

  • Y. Netzer, T. Wang, A. Coates, A. Bissacco, B. Wu, and A. Y. Ng, “Reading Digits in Natural Images with Unsupervised Feature Learning,” , 2011.
    [BibTeX]
    @article{Netzer2011,
    author = {Netzer, Yuval and Wang, Tao and Coates, Adam and Bissacco, Alessandro and Wu, Bo and Ng, Andrew Y},
    booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
    title = {Reading Digits in Natural Images with Unsupervised Feature Learning},
    year = {2011}
    }

  • J. Ngiam, A. Khosla, M. Kim, J. Nam, H. Lee, and A. Y. Ng, “Multimodal Deep Learning,” in Proceedings of The 28th International Conference on Machine Learning (ICML), 2011, pp. 689-696.
    [BibTeX]
    @inproceedings{Ngiam2011,
    author = {Ngiam, Jiquan and Khosla, Aditya and Kim, Mingyu and Nam, Juhan and Lee, Honglak and Ng, Andrew Y},
    booktitle = {Proceedings of The 28th International Conference on Machine Learning ({ICML})},
    pages = {689--696},
    title = {Multimodal Deep Learning},
    year = {2011}
    }

  • J. Schmidhuber, D. Cire{c{s}}an, U. Meier, J. Masci, and A. Graves, “On fast deep nets for AGI vision,” in Artificial General Intelligence, Springer, 2011, pp. 243-246.
    [BibTeX]
    @incollection{schmidhuber2011fast,
    title={On fast deep nets for AGI vision},
    author={Schmidhuber, Jurgen and Cire{\c{s}}an, Dan and Meier, Ueli and Masci, Jonathan and
    Graves, Alex},
    booktitle={Artificial General Intelligence},
    pages={243--246},
    year={2011},
    publisher={Springer}
    }

  • H. Schulz and S. Behnke, “Object-class segmentation using deep convolutional neural networks,” in Proceedings of the DAGM Workshop on New Challenges in Neural Computation, 2011, pp. 58-61.
    [BibTeX]
    @inproceedings{schulz2011object,
    title={Object-class segmentation using deep convolutional neural networks},
    author={Schulz, Hannes and Behnke, Sven},
    booktitle={Proceedings of the DAGM Workshop on New Challenges in Neural Computation},
    pages={58--61},
    year={2011}
    }

  • R. Socher, C. C. Lin, A. Ng, and C. Manning, “Parsing natural scenes and natural language with recursive neural networks,” in Proceedings of the 28th International Conference on Machine Learning (ICML), 2011, pp. 129-136.
    [BibTeX]
    @inproceedings{socher2011parsing,
    title={Parsing natural scenes and natural language with recursive neural networks},
    author={Socher, Richard and Lin, Cliff C and Ng, Andrew and Manning, Chris},
    booktitle={Proceedings of the 28th International Conference on Machine Learning ({ICML})},
    pages={129--136},
    year={2011}
    }

  • B. Uria, S. Renals, and K. Richmond, “A Deep Neural Network for Acoustic-Articulatory Speech Inversion,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{Uria2011,
    author = {Uria, Benigno and Renals, Steve and Richmond, Korin},
    title = {A Deep Neural Network for Acoustic-Articulatory Speech Inversion},
    year = {2011},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    }

  • V. Vanhoucke, A. Senior, and M. Z. Mao, “Improving the speed of neural networks on CPUs,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{vanhoucke2011improving,
    title={Improving the speed of neural networks on {CPUs}},
    author={Vanhoucke, Vincent and Senior, Andrew and Mao, Mark Z},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • Z. Wang and N. de Freitas, “Predictive adaptation of hybrid Monte Carlo with Bayesian parametric bandits,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{wang2011predictive,
    title={Predictive adaptation of hybrid {Monte} {Carlo} with {Bayesian} parametric bandits},
    author={Wang, Ziyu and de Freitas, Nando},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

  • M. D. Zeiler, G. W. Taylor, and R. Fergus, “Adaptive Deconvolutional Networks for Mid and High Level Feature Learning,” in 2011 IEEE International Conference on Computer Vision (ICCV), 2011, pp. 2018-2025.
    [BibTeX]
    @inproceedings{zeiler2011adaptive,
    title={Adaptive Deconvolutional Networks for Mid and High Level Feature Learning},
    author={Zeiler, Matthew D and Taylor, Graham W and Fergus, Rob},
    booktitle={2011 IEEE International Conference on Computer Vision ({ICCV})},
    pages={2018--2025},
    year={2011},
    organization={IEEE}
    }

  • W. Y. Zou, A. Y. Ng, and K. Yu, “Unsupervised learning of visual invariance with temporal coherence,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, 2011.
    [BibTeX]
    @inproceedings{zou2011unsupervised,
    title={Unsupervised learning of visual invariance with temporal coherence},
    author={Zou, Will Y and Ng, Andrew Y and Yu, Kai},
    booktitle={{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning},
    year={2011}
    }

2010

  • Y. Boureau, J. Ponce, and Y. LeCun, “A Theoretical Analysis of Feature Pooling in Visual Recognition,” in Proceedings of the 27th International Conference on Machine Learning (2010), 2010, pp. 111-118.
    [BibTeX]
    @inproceedings{Boureau2010,
    author = {Boureau, Y-Lan and Ponce, Jean and LeCun, Yann},
    booktitle = {Proceedings of the 27th International Conference on Machine Learning (2010)},
    pages = {111--118},
    title = {A Theoretical Analysis of Feature Pooling in Visual Recognition},
    year = {2010}
    }

  • Y. Boureau, F. Bach, Y. LeCun, and J. Ponce, “Learning mid-level features for recognition,” in Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on, 2010, pp. 2559-2566.
    [BibTeX]
    @inproceedings{Boureau2010a,
    author = {Boureau, Y-Lan and Bach, Francis and LeCun, Yann and Ponce, Jean},
    booktitle = {Computer Vision and Pattern Recognition (CVPR), 2010 IEEE Conference on},
    pages = {2559--2566},
    title = {Learning mid-level features for recognition},
    year = {2010}
    }

  • D. Cire{c{s}}an, U. Meier, L. M. Gambardella, and J. Schmidhuber, “Deep Big Simple Neural Nets Excel on Handwritten Digit Recognition,” Neural Computation, vol. 22, iss. 12, pp. 3207-3220, 2010.
    [BibTeX]
    @article{Ciresan2010,
    author = {Cire{\c{s}}an, Dan and Meier, Ueli and Gambardella, Luca Maria and Schmidhuber, Jurgen},
    journal = {Neural Computation},
    number = {12},
    pages = {3207--3220},
    title = {Deep Big Simple Neural Nets Excel on Handwritten Digit Recognition},
    volume = {22},
    year = {2010}
    }

  • G. E. Dahl, M. Ranzato, A. -, and G. E. Hinton, “Phone Recognition with the Mean-Covariance Restricted Boltzmann Machine,” in Advances in Neural Information Processing Systems 23, 2010, pp. 469-477.
    [BibTeX]
    @inproceedings{dahl2010phone,
    title = {Phone Recognition with the Mean-Covariance Restricted {B}oltzmann Machine},
    author = {George E. Dahl and Marc'Aurelio Ranzato and Abdel{-}Rahman
    Mohamed and Geoffrey E. Hinton},
    booktitle = {Advances in Neural Information Processing Systems 23},
    editor = {J. Lafferty and C. K. I. Williams and J. Shawe-Taylor and R.S. Zemel and A. Culotta},
    pages = {469--477},
    year = {2010}
    }

  • G. Desjardins, A. C. Courville, Y. Bengio, P. Vincent, and O. Delalleau, “Tempered Markov chain Monte Carlo for training of restricted Boltzmann machines,” in International Conference on Artificial Intelligence and Statistics (AISTATS), 2010, pp. 145-152.
    [BibTeX]
    @inproceedings{desjardins2010tempered,
    title={Tempered {Markov} chain {Monte} {Carlo} for training of restricted {Boltzmann} machines},
    author={Desjardins, Guillaume and Courville, Aaron C and Bengio, Yoshua and Vincent, Pascal and
    Delalleau, Olivier},
    booktitle={International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    pages={145--152},
    year={2010}
    }

  • D. Erhan, A. Courville, and P. Vincent, “Why Does Unsupervised Pre-training Help Deep Learning ?,” Journal of Machine Learning Research, vol. 11, iss. 2007, pp. 625-660, 2010.
    [BibTeX]
    @article{Erhan2010,
    author = {Erhan, Dumitru and Courville, Aaron and Vincent, Pascal},
    journal = {Journal of Machine Learning Research},
    number = {2007},
    pages = {625--660},
    publisher = {JMLR. org},
    title = {Why Does Unsupervised Pre-training Help Deep Learning ?},
    volume = {11},
    year = {2010}
    }

  • K. Kavukcuoglu, M. Ranzato, and Y. LeCun, “Fast inference in sparse coding algorithms with applications to object recognition,” 2010.
    [BibTeX]
    @techreport{kavukcuoglu2010fast,
    title={Fast inference in sparse coding algorithms with applications to object recognition},
    author={Kavukcuoglu, Koray and Ranzato, Marc'Aurelio and LeCun, Yann},
    archivePrefix = {arXiv},
    arxivId = {arXiv:1010.3467},
    eprint ={arXiv:1010.3467},
    year={2010}
    }

  • A. Krizhevsky, G. E. Hinton, and others, “Factored 3-way restricted Boltzmann machines for modeling natural images,” in International Conference on Artificial Intelligence and Statistics (AISTATS), 2010, pp. 621-628.
    [BibTeX]
    @inproceedings{krizhevsky2010factored,
    title={Factored 3-way restricted {Boltzmann} machines for modeling natural images},
    author={Krizhevsky, Alex and Hinton, Geoffrey E and others},
    booktitle={International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    pages={621--628},
    year={2010}
    }

  • M. Ranzato and G. E. Hinton, “Modeling Pixel Means and Covariances using Factorized Third-order Boltzmann Machines,” in 2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2010, pp. 2551-2558.
    [BibTeX]
    @inproceedings{ranzato2010modeling,
    title={Modeling Pixel Means and Covariances using Factorized Third-order {Boltzmann} Machines},
    author={Ranzato, Marc’Aurelio and Hinton, Geoffrey E},
    booktitle={2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    pages={2551--2558},
    year={2010},
    organization={IEEE}
    }

  • G. W. Taylor, R. Fergus, Y. LeCun, and C. Bregler, “Convolutional learning of spatio-temporal features,” in Proceedings of the European Confernce on Computer Vision (ECCV), Springer, 2010, pp. 140-153.
    [BibTeX]
    @incollection{taylor2010convolutional,
    title={Convolutional learning of spatio-temporal features},
    author={Taylor, Graham W and Fergus, Rob and LeCun, Yann and Bregler, Christoph},
    booktitle={Proceedings of the European Confernce on Computer Vision (ECCV)},
    seriestitle = {Lecture Notes in Computer Science},
    pages={140--153},
    year={2010},
    publisher={Springer}
    }

  • J. Yang, K. Yu, and T. Huang, “Supervised translation-invariant sparse coding,” in 2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2010, pp. 3517-3524. doi:10.1109/CVPR.2010.5539958
    [BibTeX]
    @INPROCEEDINGS{Yang:2010,
    author={Jianchao Yang and Kai Yu and Huang, T.},
    booktitle={2010 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    title={Supervised translation-invariant sparse coding},
    year={2010},
    month={June},
    pages={3517-3524},
    doi={10.1109/CVPR.2010.5539958},
    ISSN={1063-6919},
    }

  • M. D. Zeiler, D. Krishnan, G. W. Taylor, and R. Fergus, “Deconvolutional Networks,” in IEEE Computer Society Conference on Computer Vision and Pattern Recognition (CVPR), 2010, pp. 2528-2535. doi:10.1109/CVPR.2010.5539957
    [BibTeX]
    @inproceedings{Zeiler2010,
    author = {Zeiler, Matthew D and Krishnan, Dilip and Taylor, Graham W and Fergus, Rob},
    booktitle = {IEEE Computer Society Conference on Computer Vision and Pattern Recognition ({CVPR})},
    doi = {10.1109/CVPR.2010.5539957},
    isbn = {978-1-4244-6984-0},
    month = jun,
    pages = {2528--2535},
    publisher = {IEEE},
    title = {Deconvolutional Networks},
    year = {2010}
    }

2009

  • Y. Bengio and Y. LeCun, “Tutorial: Learning Deep Architectures,” in ICML Workshop on Learning Feature Hierarchies, 2009.
    [BibTeX]
    @inproceedings{Bengio2009,
    author = {Bengio, Yoshua and LeCun, Yann},
    title = {Tutorial: Learning Deep Architectures},
    booktitle = {{ICML} {W}orkshop on Learning Feature Hierarchies},
    year = {2009}
    }

  • Y. Bengio, “Learning Deep Architectures for AI,” Foundations and Trends in Machine Learning, vol. 2, iss. 1, 2009. doi:10.1561/2200000006
    [BibTeX]
    @article{Bengio2009a,
    author = {Bengio, Yoshua},
    doi = {10.1561/2200000006},
    isbn = {2200000006},
    issn = {1935-8237},
    journal = {Foundations and Trends in Machine Learning},
    number = {1},
    title = {Learning Deep Architectures for {A}{I}},
    volume = {2},
    year = {2009}
    }

  • D. Erhan, P. Manzagol, Y. Bengio, S. Bengio, and P. Vincent, “The difficulty of training deep architectures and the effect of unsupervised pre-training,” in International Conference on Artificial Intelligence and Statistics (AISTATS), 2009, pp. 153-160.
    [BibTeX]
    @inproceedings{erhan2009difficulty,
    title={The difficulty of training deep architectures and the effect of unsupervised pre-training},
    author={Erhan, Dumitru and Manzagol, Pierre-Antoine and Bengio, Yoshua and Bengio, Samy and
    Vincent, Pascal},
    booktitle={International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    pages={153--160},
    year={2009}
    }

  • I. Goodfellow, H. Lee, Q. V. Le, A. Saxe, and A. Y. Ng, “Measuring invariances in deep networks,” in Advances in Neural Information Processing Systems (NIPS), 2009, pp. 646-654.
    [BibTeX]
    @inproceedings{goodfellow2009measuring,
    title={Measuring invariances in deep networks},
    author={Goodfellow, Ian and Lee, Honglak and Le, Quoc V and Saxe, Andrew and Ng, Andrew Y},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={646--654},
    year={2009}
    }

  • K. Jarrett, K. Kavukcuoglu, M. A. Ranzato, and Y. LeCun, “What is the best multi-stage architecture for object recognition?,” in IEEE 12th International Conference on Computer Vision (ICCV), 2009, pp. 2146-2153.
    [BibTeX]
    @inproceedings{Jarrett2009,
    author = {Jarrett, Kevin and Kavukcuoglu, Koray and Ranzato, Marc Aurelio and LeCun, Yann},
    booktitle = {IEEE 12th International Conference on Computer Vision ({ICCV})},
    pages = {2146--2153},
    title = {What is the best multi-stage architecture for object recognition?},
    year = {2009}
    }

  • K. Kavukcuoglu, M. Ranzato, R. Fergus, and Y. LeCun, “Learning invariant features through topographic filter maps,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2009, pp. 1605-1612.
    [BibTeX]
    @inproceedings{kavukcuoglu2009learning,
    title={Learning invariant features through topographic filter maps},
    author={Kavukcuoglu, Koray and Ranzato, Marc’Aurelio and Fergus, Rob and LeCun, Yann},
    booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    pages={1605--1612},
    year={2009},
    organization={IEEE}
    }

  • H. Larochelle, Y. Bengio, J. Louradour, and P. Lamblin, “Exploring strategies for training deep neural networks,” The Journal of Machine Learning Research, vol. 10, pp. 1-40, 2009.
    [BibTeX]
    @article{larochelle2009exploring,
    title={Exploring strategies for training deep neural networks},
    author={Larochelle, Hugo and Bengio, Yoshua and Louradour, J{\'e}r{\^o}me and Lamblin, Pascal},
    journal={The Journal of Machine Learning Research},
    volume={10},
    pages={1--40},
    year={2009},
    publisher={JMLR.org}
    }

  • H. Lee, R. Grosse, R. Ranganath, and A. Y. Ng, “Convolutional deep belief networks for scalable unsupervised learning of hierarchical representations,” in Proceedings of the $26^{th}$ Annual International Conference on Machine Learning (ICML), 2009, pp. 609-616.
    [BibTeX]
    @inproceedings{lee2009convolutional,
    title={Convolutional deep belief networks for scalable unsupervised learning of hierarchical
    representations},
    author={Lee, Honglak and Grosse, Roger and Ranganath, Rajesh and Ng, Andrew Y},
    booktitle={Proceedings of the $26^{th}$ Annual International Conference on Machine Learning ({ICML})},
    pages={609--616},
    year={2009},
    organization={ACM}
    }

  • H. Lee, R. Raina, A. Teichman, and A. Y. Ng, “Exponential Family Sparse Coding with Application to Self-taught Learning,” in Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI), 2009, pp. 1113-1119.
    [BibTeX]
    @inproceedings{lee2009exponential,
    title={Exponential Family Sparse Coding with Application to Self-taught Learning},
    author={Lee, Honglak and Raina, Rajat and Teichman, Alex and Ng, Andrew Y},
    booktitle={Proceedings of the International Joint Conference on Artificial Intelligence (IJCAI)},
    volume={9},
    pages={1113--1119},
    year={2009}
    }

  • R. Salakhutdinov and G. E. Hinton, “Deep Boltzmann Machines,” in International Conference on Artificial Intelligence and Statistics (AISTATS), 2009, pp. 448-455.
    [BibTeX]
    @inproceedings{salakhutdinov2009deep,
    title={Deep {Boltzmann} Machines},
    author={Salakhutdinov, Ruslan and Hinton, Geoffrey E},
    booktitle={International Conference on Artificial Intelligence and Statistics ({AISTATS})},
    pages={448--455},
    year={2009}
    }

  • J. Yang, K. Yu, Y. Gong, and T. Huang, “Linear spatial pyramid matching using sparse coding for image classification,” in IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2009, pp. 1794-1801.
    [BibTeX]
    @inproceedings{yang2009linear,
    title={Linear spatial pyramid matching using sparse coding for image classification},
    author={Yang, Jianchao and Yu, Kai and Gong, Yihong and Huang, Thomas},
    booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
    pages={1794--1801},
    year={2009},
    }

  • K. Yu, T. Zhang, and Y. Gong, “Nonlinear Learning using Local Coordinate Coding,” in Advances in Neural Information Processing Systems (NIPS), 2009, pp. 2223-2231.
    [BibTeX]
    @inproceedings{yu2009nonlinear,
    title={Nonlinear Learning using Local Coordinate Coding},
    author={Yu, Kai and Zhang, Tong and Gong, Yihong},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={2223--2231},
    year={2009}
    }

2008

  • R. Collobert and J. Weston, “A Unified Architecture for Natural Language Processing: Deep Neural Networks with Multitask Learning,” in Proceedings of the 25th International Conference on Machine Learning, 2008, pp. 160-167.
    [BibTeX]
    @inproceedings{collobert2008unified,
    title={A Unified Architecture for Natural Language Processing: Deep Neural Networks with Multitask Learning},
    author={Collobert, Ronan and Weston, Jason},
    booktitle={Proceedings of the 25th International Conference on Machine Learning},
    pages={160--167},
    year={2008},
    organization={ACM}
    }

  • G. Desjardins and Y. Bengio, “Empirical Evaluation of Convolutional RBMs for Vision,” Département d’Informatique et de Recherche Opérationnelle, Université de Montréal, 1327, 2008.
    [BibTeX] [Abstract]

    Convolutional Neural Networks (CNN) have had great success in machine learning tasks involving vision and represent one of the early successes of deep networks. Local receptive fields and weight sharing make their architecture ideally suited for vision tasks by helping to enforce a prior based on our knowledge of natural images. This same prior could also be applied to recent developments in the field of deep networks, in order to tailor these new architectures for artificial vision. In this context, we show how the Restricted Boltzmann Machine (RBM), the building block of Deep Belief Networks (DBN), can be adapted to operate in a convolutional manner. We compare their performance to standard fully-connected RBMs on a simple visual learning task and show that the convolutional RBMs (CRBMs) converge to smaller values of the negative likelihood function. Our experiments also indicate that CRBMs are more efficient than standard RBMs trained on small image patches, with the CRBMs having faster convergence.

    @techreport{Desjardins2008,
    author = {Desjardins, Guillaume and Bengio, Yoshua},
    keywords = {Convolutional Architectures, Deep Networks, RBM, Vision},
    title = {Empirical Evaluation of Convolutional {RBMs} for Vision},
    number = {1327},
    year = {2008},
    institution = {D{\'{e}}partement d'Informatique et de Recherche Op{\'{e}}rationnelle, Universit{\'{e}} de Montr{\'{e}}al},
    abstract = {Convolutional Neural Networks (CNN) have had great success in machine learning tasks involving vision and represent one of the early successes of deep networks. Local receptive fields and weight
    sharing make their architecture ideally suited for vision tasks by helping to enforce a prior based on our knowledge of natural images. This same prior could also be applied to recent developments in the field of deep networks, in order to tailor these new architectures for artificial vision. In this context, we show how the Restricted Boltzmann Machine (RBM), the building block of Deep Belief Networks (DBN), can be adapted to operate in a convolutional manner. We compare their performance to standard fully-connected RBMs on a simple visual learning task and show that the convolutional RBMs (CRBMs) converge to smaller values of the negative likelihood function. Our experiments also indicate that CRBMs are more efficient than standard RBMs trained on small image patches, with the CRBMs having faster convergence.}
    }

  • A. Graves and J. Schmidhuber, “Offline handwriting recognition with multidimensional recurrent neural networks,” in Advances in Neural Information Processing Systems, 2008, pp. 545-552.
    [BibTeX]
    @inproceedings{graves2008offline,
    title={Offline handwriting recognition with multidimensional recurrent neural networks},
    author={Graves, Alex and Schmidhuber, Juergen},
    booktitle={Advances in Neural Information Processing Systems},
    pages={545--552},
    year={2008}
    }

  • R. Salakhutdinov and I. Murray, “On the quantitative analysis of deep belief networks,” in Proceedings of the 25th International Conference on Machine Learning, 2008, pp. 872-879.
    [BibTeX]
    @inproceedings{salakhutdinov2008quantitative,
    title={On the quantitative analysis of deep belief networks},
    author={Salakhutdinov, Ruslan and Murray, Iain},
    booktitle={Proceedings of the 25th International Conference on Machine Learning},
    pages={872--879},
    year={2008},
    organization={ACM}
    }

  • P. Vincent, H. Larochelle, Y. Bengio, and P. Manzagol, “Extracting and Composing Robust Features with Denoising Autoencoders,” in Proceedings of the 25th International Conference on Machine learning, 2008, pp. 1096-1103.
    [BibTeX]
    @inproceedings{vincent2008extracting,
    title={Extracting and Composing Robust Features with Denoising Autoencoders},
    author={Vincent, Pascal and Larochelle, Hugo and Bengio, Yoshua and Manzagol, Pierre-Antoine},
    booktitle={Proceedings of the 25th International Conference on Machine learning},
    pages={1096--1103},
    year={2008},
    organization={ACM}
    }

2007

  • Y. Bengio, P. Lamblin, D. Popovici, and H. Larochelle, “Greedy layer-wise training of deep networks,” Advances in Neural Information Processing Systems (NIPS), vol. 19, pp. 153-160, 2007.
    [BibTeX]
    @article{bengio2007greedy,
    title={Greedy layer-wise training of deep networks},
    author={Bengio, Yoshua and Lamblin, Pascal and Popovici, Dan and Larochelle, Hugo},
    journal={Advances in Neural Information Processing Systems ({NIPS})},
    volume={19},
    pages={153-160},
    year={2007},
    publisher={MIT; 1998}
    }

  • Y. Boureau, Y. Le{C}un, and others, “Sparse feature learning for deep belief networks,” in Advances in Neural Information Processing Systems, 2007, pp. 1185-1192.
    [BibTeX]
    @inproceedings{boureau2007sparse,
    title={Sparse feature learning for deep belief networks},
    author={Boureau, Y-Lan and Le{C}un, Yann and others},
    booktitle={Advances in Neural Information Processing Systems},
    pages={1185--1192},
    year={2007}
    }

  • G. E. Hinton, “Learning multiple layers of representation,” Trends in Cognitive Sciences, vol. 11, iss. 10, pp. 428-434, 2007.
    [BibTeX]
    @article{hinton2007learning,
    title={Learning multiple layers of representation},
    author={Hinton, Geoffrey E},
    journal={Trends in Cognitive Sciences},
    volume={11},
    number={10},
    pages={428--434},
    year={2007},
    publisher={Elsevier}
    }

  • “To recognize shapes, first learn to generate images,” in Computational Neuroscience: Theoretical Insights into Brain Function, T. D. Paul Cisek and J. F. Kalaska, Eds., Elsevier, 2007, vol. 165, pp. 535-547. doi:10.1016/S0079-6123(06)65034-6
    [BibTeX]
    @incollection{hinton2007recognize,
    title = "To recognize shapes, first learn to generate images ",
    editor = "Paul Cisek, Trevor Drew and John F. Kalaska",
    booktitle = "Computational Neuroscience: Theoretical Insights into Brain Function",
    publisher = "Elsevier",
    year = "2007",
    volume = "165",
    pages = "535 - 547",
    series = "Progress in Brain Research ",
    issn = "0079-6123",
    doi = {10.1016/S0079-6123(06)65034-6},
    }

  • H. Lee, C. Ekanadham, and A. Ng, “Sparse deep belief net model for visual area V2,” in Advances in Neural Information Processing Systems (NIPS), 2007, pp. 873-880.
    [BibTeX]
    @inproceedings{lee2007sparse,
    title={Sparse deep belief net model for visual area {V}2},
    author={Lee, Honglak and Ekanadham, Chaitanya and Ng, Andrew},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={873--880},
    year={2007}
    }

  • R. Raina, A. Battle, H. Lee, B. Packer, and A. Y. Ng, “Self-taught Learning: Transfer Learning from Unlabeled Data,” in Proceedings of the 24th International Conference on Machine Learning, 2007, pp. 759-766.
    [BibTeX]
    @inproceedings{raina2007self,
    title={Self-taught Learning: Transfer Learning from Unlabeled Data},
    author={Raina, Rajat and Battle, Alexis and Lee, Honglak and Packer, Benjamin and Ng, Andrew Y},
    booktitle={Proceedings of the 24th International Conference on Machine Learning},
    pages={759--766},
    year={2007},
    organization={ACM}
    }

2006

  • A. Graves, S. Fernández, F. Gomez, and J. Schmidhuber, “Connectionist temporal classification: labelling unsegmented sequence data with recurrent neural networks,” in Proceedings of the 23rd International Conference on Machine Learning, 2006, pp. 369-376.
    [BibTeX]
    @inproceedings{graves2006connectionist,
    title={Connectionist temporal classification: labelling unsegmented sequence data with recurrent
    neural networks},
    author={Graves, Alex and Fern{\'a}ndez, Santiago and Gomez, Faustino and Schmidhuber, J{\"u}rgen},
    booktitle={Proceedings of the 23rd International Conference on Machine Learning},
    pages={369--376},
    year={2006},
    organization={ACM}
    }

  • G. E. Hinton, S. Osindero, and Y. Teh, “A fast learning algorithm for deep belief nets,” Neural computation, vol. 18, iss. 7, pp. 1527-1554, 2006.
    [BibTeX]
    @article{hinton2006fast,
    title={A fast learning algorithm for deep belief nets},
    author={Hinton, Geoffrey E and Osindero, Simon and Teh, Yee-Whye},
    journal={Neural computation},
    volume={18},
    number={7},
    pages={1527--1554},
    year={2006},
    publisher={MIT Press}
    }

  • G. E. Hinton and R. R. Salakhutdinov, “Reducing the dimensionality of data with neural networks,” Science, vol. 313, iss. 5786, pp. 504-507, 2006.
    [BibTeX]
    @article{hinton2006reducing,
    title={Reducing the dimensionality of data with neural networks},
    author={Hinton, Geoffrey E and Salakhutdinov, Ruslan R},
    journal={Science},
    volume={313},
    number={5786},
    pages={504--507},
    year={2006},
    publisher={American Association for the Advancement of Science}
    }

  • H. Lee, A. Battle, R. Raina, and A. Ng, “Efficient sparse coding algorithms,” in Advances in Neural Information Processing Systems (NIPS), 2006, pp. 801-808.
    [BibTeX]
    @inproceedings{lee2006efficient,
    title={Efficient sparse coding algorithms},
    author={Lee, Honglak and Battle, Alexis and Raina, Rajat and Ng, Andrew},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={801--808},
    year={2006}
    }

  • G. W. Taylor, G. E. Hinton, and S. T. Roweis, “Modeling Human Motion using Binary Latent Variables,” in Advances in Neural Information Processing Systems (NIPS), 2006, pp. 1345-1352.
    [BibTeX]
    @inproceedings{taylor2006modeling,
    title={Modeling Human Motion using Binary Latent Variables},
    author={Taylor, Graham W and Hinton, Geoffrey E and Roweis, Sam T},
    booktitle={Advances in Neural Information Processing Systems ({NIPS})},
    pages={1345--1352},
    year={2006}
    }

2005

  • M. van de Giessen and J. Schmidhuber, “Fast color-based object recognition independent of position and orientation,” in Artificial Neural Networks: Biological Inspirations–International Conference on Artificial Neural networks (ICANN), Springer, 2005, pp. 469-474.
    [BibTeX]
    @incollection{van2005fast,
    title={Fast color-based object recognition independent of position and orientation},
    author={van de Giessen, Martijn and Schmidhuber, J{\"u}rgen},
    booktitle={Artificial Neural Networks: Biological Inspirations--International Conference on Artificial
    Neural networks (ICANN)},
    pages={469--474},
    year={2005},
    publisher={Springer}
    }

1996

  • B. Olshausen and D. Field, “Emergence of simple-cell receptive field properties by learning a sparse code for natural images,” Nature, vol. 381, pp. 607-609, 1996.
    [BibTeX]
    @article{Olshausen1996,
    author = {Olshausen, Bruno and Field, David},
    journal = {Nature},
    pages = {607--609},
    title = {Emergence of simple-cell receptive field properties by learning a sparse code for natural images},
    volume = {381},
    year = {1996}
    }

1994

  • D. L. Ruderman, “The statistics of natural images,” Network: Computation in Neural Systems, vol. 5, iss. 4, pp. 517-548, 1994.
    [BibTeX]
    @article{ruderman1994statistics,
    title={The statistics of natural images},
    author={Ruderman, Daniel L},
    journal={Network: Computation in Neural Systems},
    volume={5},
    number={4},
    pages={517--548},
    year={1994},
    publisher={Informa UK Ltd UK}
    }

1989

  • Y. LeCun, B. Boser, J. S. Denker, D. Henderson, R. E. Howard, W. Hubbard, and L. D. Jackel, “Backpropagation applied to handwritten zip code recognition,” Neural Computation, vol. 1, iss. 4, pp. 541-551, 1989.
    [BibTeX]
    @article{lecun1989backpropagation,
    title={Backpropagation applied to handwritten zip code recognition},
    author={LeCun, Yann and Boser, Bernhard and Denker, John S and Henderson, Donnie and
    Howard, Richard E and Hubbard, Wayne and Jackel, Lawrence D},
    journal={Neural Computation},
    volume={1},
    number={4},
    pages={541--551},
    year={1989},
    publisher={MIT Press}
    }

  • Y. Bengio, N. Boulanger-Lewandowski, and R. Pascanu, “Advances in optimizing recurrent networks.”
    [BibTeX]
    @article{BengioAdvances,
    archivePrefix = {arXiv},
    arxivId = {arXiv:1212.0901v2},
    author={Bengio, Yoshua and Boulanger-Lewandowski, Nicolas and Pascanu, Razvan},
    eprint = {arXiv:1212.0901v2},
    title={Advances in optimizing recurrent networks},
    }

  • C. Shen, M. Song, and Z. Qi, ,” in NIPS Workshop on Deep Learning and Unsupervised Feature Learning, .
    [BibTeX]
    @inproceedings{Shen2012,
    author = {Shen, Chengyao and Song, Mingli and Qi, Zhao},
    booktitle = {{NIPS} Workshop on Deep Learning and Unsupervised Feature Learning}}

 

Disclaimer: The list might contain errors, especially those due to erroneous BibTeX citations found on different online libraries. Please report these to us.

Bibliography collated by Woo-Sup Han.