@ARTICLE{Cherifi_El-Hadi_Conditional_2021, author={Cherifi, El-Hadi and Guerti, Mhania}, volume={vol. 46}, number={No 2}, journal={Archives of Acoustics}, pages={237-247}, howpublished={online}, year={2021}, publisher={Polish Academy of Sciences, Institute of Fundamental Technological Research, Committee on Acoustics}, abstract={Orthographic-To-Phonetic (O2P) Transcription is the process of learning the relationship between the written word and its phonetic transcription. It is a necessary part of Text-To-Speech (TTS) systems and it plays an important role in handling Out-Of-Vocabulary (OOV) words in Automatic Speech Recognition systems. The O2P is a complex task, because for many languages, the correspondence between the orthography and its phonetic transcription is not completely consistent. Over time, the techniques used to tackle this problem have evolved, from earlier rules based systems to the current more sophisticated machine learning approaches. In this paper, we propose an approach for Arabic O2P Conversion based on a probabilistic method: Conditional Random Fields (CRF). We discuss the results and experiments of this method apply on a pronunciation dictionary of the Most Commonly used Arabic Words, a database that we called (MCAW-Dic). MCAW-Dic contains over 35 000 words in Modern Standard Arabic (MSA) and their pronunciation, a database that we have developed by ourselves assisted by phoneticians and linguists from the University of Tlemcen. The results achieved are very satisfactory and point the way towards future innovations. Indeed, in all our tests, the score was between 11 and 15% error rate on the transcription of phonemes (Phoneme Error Rate). We could improve this result by including a large context, but in this case, we encountered memory limitations and calculation difficulties.}, type={Article}, title={Conditional Random Fields Applied to Arabic Orthographic-Phonetic Transcription}, URL={http://journals.pan.pl/Content/119918/aoa.2021.136574.pdf}, doi={10.24425/aoa.2021.136574}, keywords={Orthographic-To-Phonetic Transcription, Conditional Random Fields, text-to-speech, Arabic speech synthesis, Modern Standard Arabic}, }