@conference {130, title = {Bridging the Audio-Symbolic Gap: The Discovery of Repeated Note Content Directly from Polyphonic Music Audio}, booktitle = {53rd AES Conference on Semantic Audio}, year = {2014}, month = {01/2014}, address = {London, UK}, author = {Collins, Tom and Sebastian B{\"o}ck and Krebs, Florian and Widmer, Gerhard} } @conference {131, title = {The Complete Classical Music Companion V0.9}, booktitle = {53rd AES Conference on Semantic Audio}, year = {2014}, month = {01/2014}, address = {London, UK}, author = {Andreas Arzt and Sebastian B{\"o}ck and Flossmann, Sebastian and Frostel, Harald and Gasser, Martin and Widmer, Gerhard} } @conference {129, title = {What Really Moves Us in Music: Expressivity as a Challenge to Semantic Audio Research}, booktitle = {53rd AES Conference on Semantic Audio}, year = {2014}, month = {01/2014}, address = {London, UK}, author = {Widmer, Gerhard} } @conference {113, title = {Automatic alignment of music performances with structural differences}, booktitle = {Proceedings of the 14th International Society for Music Information Retrieval Conference}, year = {2013}, month = {November}, address = {Curitiba, Brazil}, abstract = {

Both in interactive music listening, and in music performance research, there is a need for automatic alignment of different recordings of the same musical piece. This task is challenging, because musical pieces often contain parts that may or may not be repeated by the performer, possibly leading to structural differences between performances (or between performance and score). The most common alignment method, dynamic time warping (DTW), cannot handle structural differences adequately, and existing approaches to deal with structural differences explicitly rely on the annotation of {\textquoteleft}{\textquoteleft}break points{\textquoteright}{\textquoteright} in one of the sequences. We propose a simple extension of the Needleman-Wunsch algorithm to deal effectively with structural differences, without relying on annotations. We evaluate several audio features for alignment, and show how an optimal value can be found for the cost-parameter of the alignment algorithm. A single cost value is demonstrated to be valid across different types of music. We demonstrate that our approach yields roughly equal alignment accuracies compared to DTW in the absence of structural differences, and superior accuracies when structural differences occur.

}, author = {Grachten, Maarten and Gasser, Martin and Andreas Arzt and Widmer, Gerhard} } @conference {110, title = {Enhanced peak picking for onset detection with recurrent neural networks}, booktitle = {Proceedings of the 6th International Workshop on Machine Learning and Music}, year = {2013}, month = {September}, address = {Prague, Czech Republic}, abstract = {

We present a new neural network based peak-picking algorithm for common onset detection functions. Compared to existing hand-crafted methods it yields a better performance and leads to a much lower number of false negative detections. The performance is evaluated on basis of a huge dataset with over 25k annotated onsets and shows a significant improvement over existing methods in cases of signals with previously unknown levels.

}, keywords = {onset detection, peak-picking}, author = {Sebastian B{\"o}ck and Schl{\"u}ter, Jan and Widmer, Gerhard} } @conference {109, title = {Local Group Delay based Vibrato and Tremolo Suppression for Onset Detection}, booktitle = {Proceedings of the 14th International Society for Music Information Retrieval Conference (ISMIR 2013)}, year = {2013}, month = {November}, address = {Curitiba, Brazil}, abstract = {

We present SuperFlux - a new onset detection algorithm with vibrato suppression. It is an enhanced version of the universal spectral flux onset detection algorithm, and reduces the number of false positive detections considerably by tracking spectral trajectories with a maximum filter. Especially for music with heavy use of vibrato (e.g., sung operas or string performances), the number of false positive detections can be reduced by up to 60\% without missing any additional events. Algorithm performance was evaluated and compared to state-of-the-art methods on the basis of three different datasets comprising mixed audio material (25,927 onsets), violin recordings (7,677 onsets) and operatic solo voice recordings (1,448 onsets). Due to its causal nature, the algorithm is applicable in both offline and online real-time scenarios.

}, keywords = {local group delay, onset detection, tremolo suppression, vibrato suppression}, author = {Sebastian B{\"o}ck and Widmer, Gerhard} } @conference {88, title = {Maximum Filter Vibrato Suppression for Onset Detection}, booktitle = {Proceedings of the 16th International Conference on Digital Audio Effects (DAFx-13)}, year = {2013}, month = {September}, address = {Maynooth, Ireland}, abstract = {

We present SuperFlux - a new onset detection algorithm with vibrato suppression. It is an enhanced version of the universal spectral flux onset detection algorithm, and reduces the number of false positive detections considerably by tracking spectral trajectories with a maximum filter. Especially for music with heavy use of vibrato (e.g., sung operas or string performances), the number of false positive detections can be reduced by up to 60\% without missing any additional events. Algorithm performance was evaluated and compared to state-of-the-art methods on the basis of three different datasets comprising mixed audio material (25,927 onsets), violin recordings (7,677 onsets) and operatic solo voice recordings (1,448 onsets). Due to its causal nature, the algorithm is applicable in both offline and online real-time scenarios.

}, keywords = {maximum filter, onset detection, vibrato suppression}, author = {Sebastian B{\"o}ck and Widmer, Gerhard} } @conference {89, title = {Refined Spectral Template Models for Score Following}, booktitle = {Proceedings of the Sound and Music Computing Conference (SMC)}, year = {2013}, address = {Stockholm, Sweden}, abstract = {Score followers often use spectral templates for notes and chords to estimate the similarity between positions in the score and the incoming audio stream. Here, we propose two methods on different modelling levels to improve the quality of these templates, and subsequently the quality of the alignment. The first method focuses on creating more informed tem- plates for individual notes. This is achieved by estimating the template based on synthesised sounds rather than generic Gaussian mixtures, as used in current state-of-the-art systems. The second method introduces an advanced approach to aggregate individual note templates into spectral templates representing a specific score position. In contrast to score chordification, the common procedure used by score fol- lowers to deal with polyphonic scores, we use weighting functions to weight notes, observing their temporal relationships. We evaluate both methods against a dataset of classical piano music to show their positive impact on the alignment quality. }, author = {Korzeniowski, Filip and Widmer, Gerhard} } @proceedings {116, title = {Rhytmic Pattern Modeling for Beat and Downbeat Tracking in Musical Audio}, journal = {Proceedings of the 14th International Society for Music Information Retrieval Conference (ISMIR 2013)}, year = {2013}, month = {November}, abstract = {

Rhythmic patterns are an important structural element in music. This paper investigates the use of rhythmic pattern modeling to infer metrical structure in musical audio recordings. We present a Hidden Markov Model (HMM) based system that simultaneously extracts beats, downbeats, tempo, meter, and rhythmic patterns. Our model builds upon the basic structure proposed by Whiteley et. al, which we further modified by introducing a new observation model: rhythmic patterns are learned directly from data, which makes the model adaptable to the rhythmical structure of any kind of music. For learning rhythmic patterns and evaluating beat and downbeat tracking, 697 ballroom dance pieces were annotated with beat and measure information. The results showed that explicitly modeling rhythmic patterns of dance styles drastically reduces octave errors (detection of half or double tempo) and substantially improves downbeat tracking.

}, author = {Krebs, Florian and Sebastian B{\"o}ck and Widmer, Gerhard} } @conference {90, title = {Tracking Rests and Tempo Changes: Improved Score Following with Particle Filters}, booktitle = {Proceedings of the International Computer Music Conference (ICMC)}, year = {2013}, address = {Perth, Australia}, abstract = {

In this paper we present a score following system based on a Dynamic Bayesian Network, using particle filtering as inference method. The proposed model sets itself apart from existing approaches by including two new extensions: A multi-level tempo model to improve alignment quality of performances with challenging tempo changes, and an extension to reflect different expressive characteristics of notated rests. Both extensions are evaluated against a dataset of classical piano music. As the results show, the extensions improve both the accuracy and the robustness of the algorithm.

}, author = {Korzeniowski, Filip and Krebs, Florian and Andreas Arzt and Widmer, Gerhard} }