##############################################################################################################
## This script opens all sounds files in a directory, one by one, checks if a TextGrid with the same name 
## already exists. If it doesn't, it creates a TextGrid with two tiers and automatically inserts two boundaries 
## in order to limit further analysis to a specific part of the signal. Then, each sound file and associated TextGrid is
## opened into the editor where these boundaries have to be adjusted by the user. 
## Then, the script performs an automatic segmentation into pseudo-syllables (based on intensity) within these boundaries.
## Once again, each sound file and associated TextGrid is opened into the editor so that the segmentation may be adjusted by the user.
## Finally, the script isolates an intensity plateau and its middle point, the intensity peak, within each pseudo-syllable.
## Once it goes through all the files in the directory, it measures duration of the pseudo-syllables and intensity plateaus, 
## as well as distance between intensity peaks (Inter-syllable intervals, EIS) and writes the measures into a result file.
##
## Part of the script is based on the "markbysyllable" script from http://www.praatvocaltoolkit.com/index.html
## The script also uses procedure split written by Atria (2014).
## The script was written for analyzing data for project MonPaGe. November 2016.
##############################################################################################################

# Create a dialog box and enter the directory location with the sound files, the type of sound files and
# the name of the textgrid tier(s).
form Enter directory
# Enter full path to the directory where the recordings are stored
# IMPORTANT: Insert a SLASH AT THE END of the directory name!!!(backslash \ for Windows, forward slash / for Mac)
# IMPORTANT: Textgrid tier names have to be enclosed in " ".
	sentence IMPORTANT_READ Insert \ (for Windows) or / (for Mac) at the end of directory name.
	sentence Directory 
	sentence Filetype wav
	sentence TierName "Pseudo-syllables Comments"
	sentence Resultfile Results_SpeechRate_Text
endform

#####################################################################################################################
## This part of the script creates first textgrids with only two tiers and three intervals 
## aimed at limiting the analysis to a specific part of the speech signal.

# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'

# Loop through all files.
numberOfFiles = Get number of strings

for i from 1 to numberOfFiles

	# Select the first filename from the list.  
	select Strings fileList
	soundFile$ = Get string... i
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")
	
	# Get total duration of the sound file
	totalDuration = Get total duration

	# Check if TextGrid exists for the file
	textgrid_name$ = directory$ + fileName$ + ".TextGrid"
	if fileReadable (textgrid_name$)
	else

		# Create a TextGrid for the selected sound file.
		To TextGrid... 'TierName$'

		#Insert boundaries at a set time from start and before the end of the annotated interval.
		Insert boundary: 1, totalDuration - 1.000
		Insert boundary: 1, totalDuration - 0.200

		# TextGrid is saved with the same filename and in the same directory as the sound files.
		minus Sound 'fileName$'
		Write to text file... 'directory$''fileName$'.TextGrid

	# End the loop for the selected file and go on to the next file.
	endif
	select all
	minus Strings fileList
	Remove
endfor

############################################################################
## This part of the script gives user a hand to make changes to previous boundaries.
# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'

# Loop through all files.
numberOfFiles = Get number of strings

for i from 1 to numberOfFiles

	# Select the first filename from the list.  
	select Strings fileList
	soundFile$ = Get string... i
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")

	# Open a matching textgrid file
	Read from file... 'directory$''fileName$'.TextGrid
	select TextGrid 'fileName$'

	# Select sound object together with textgrid and open.
	# The script will pause once a sound and tegrid files are opened. Insert boundaries and annotations.
	# Click CONTINUE to move to the next sound-textgrid pair.
	plus Sound 'fileName$'
	Edit
	pause  Adjust syllable boundaries 

	# TextGrid is saved with the same filename and in the same directory as the sound files.
	minus Sound 'fileName$'
	Write to text file... 'directory$''fileName$'.TextGrid

	select all
	minus Strings fileList
	Remove
endfor

############################################################
## This part of the script sets intervals based on the "markbysyllable" script from http://www.praatvocaltoolkit.com/index.html
# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'

# Loop through all files.
numberOfFiles = Get number of strings

for k from 1 to numberOfFiles

	# Select the first filename from the list.  
	select Strings fileList
	soundFile$ = Get string... k
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")

	# Open a matching textgrid file
	# Select sound object together with textgrid
	Read from file... 'directory$''fileName$'.TextGrid
	select Sound 'fileName$'

	# Set intervals	
	fileName = selected ("Sound")
	Subtract mean
	tmp = Filter (pass Hann band)... 60 0 20
	clip = Get absolute extremum... 0 0 None
		if clip = undefined
			clip = 0
		endif
			clip = 'clip:4'
		if clip >= 1
			Scale... 0.9999
		endif
		select fileName
		Formula... Object_'tmp'[]
		select tmp
		Remove
		select fileName
		stt = Get start time
			if stt <> 0
				dur = Get total duration
				Scale times to... 0 dur
			endif
		Scale... 0.9999
		dur = Get total duration
		intensity = To Intensity... 40 0.05 0
		intensitytier = Down to IntensityTier
		tableofreal = Down to TableOfReal
		pnts = Get number of rows
		for i from 1 to pnts
			pnt'i' = Get value... 'i' 1
			db'i' = Get value... 'i' 2
		endfor
		thld = 40
		mrgn = 3
		select TextGrid 'fileName$'
		selectionStart = Get start point: 1, 2
		selectionStop = Get start point: 1, 3
		for i from 1 to pnts
			if pnt'i'> selectionStart
			if pnt'i'< selectionStop
				tPnt = pnt'i'
				iPrev = i - 1
				iNext = i + 1
				db = db'i'
				if i<>1
					dbPrev = db'iPrev'
				else
					dbPrev = db
				endif
				if i<>pnts
					dbNext = db'iNext'
				else
					dbNext = db
				endif
				if ((db<thld and dbPrev>thld) or (db<thld and dbNext>thld)) or (db<dbPrev+mrgn and db<dbNext-mrgn and db>thld)
					Insert boundary... 1 tPnt
				endif
			endif
			endif
			endfor

	# TextGrid is saved with the same filename and in the same directory as the sound files.
	minus Sound 'fileName$'
	Write to text file... 'directory$''fileName$'.TextGrid

	select all
	minus Strings fileList
	Remove
endfor

#########################################################################################
## This part of the script gives user a hand to make changes to automatically-set pseudo-syllables intervals.
# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'

# Loop through all files.
numberOfFiles = Get number of strings

for i from 1 to numberOfFiles

	# Select the first filename from the list.  
	select Strings fileList
	soundFile$ = Get string... i
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")

	# Open a matching textgrid file
	Read from file... 'directory$''fileName$'.TextGrid
	select TextGrid 'fileName$'

	# Select sound object together with textgrid and open.
	# The script will pause once a sound and tegrid files are opened. Insert boundaries and annotations.
	# Click CONTINUE to move to the next sound-textgrid pair.
	plus Sound 'fileName$'
	Edit
	pause  Adjust syllable boundaries 

	# TextGrid is saved with the same filename and in the same directory as the sound files.
	Write to text file... 'directory$''fileName$'.TextGrid

	select all
	minus Strings fileList
	Remove
endfor

############################################################################
## This part of the script computes Intensity plateaus and peaks within each pseudo-syllable interval.
# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'
# Loop through all files.
numberOfFiles = Get number of strings

for n from 1 to numberOfFiles

	# Select the first filename from the list.  
	select Strings fileList
	soundFile$ = Get string... n
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")

	# Open a matching textgrid file
	# Select sound object together with textgrid
	Read from file... 'directory$''fileName$'.TextGrid
	select Sound 'fileName$'

	# Set plateaus and peaks
	intensityISI = To Intensity... 100 0 "yes"
	select TextGrid 'fileName$'
	numberOfTiers = Get number of tiers
	if numberOfTiers = 2
	Insert interval tier: 2, "IntensityPlateaus"
	Insert point tier: 3, "IntensityPeaks"
	numberOfIntervals= Get number of intervals... 1
	numberOfIntervals= numberOfIntervals - 1
	for i from 2 to numberOfIntervals
		select TextGrid 'fileName$'
		start = Get start point... 1 i
		end = Get end point... 1 i
		select intensityISI
		thepoint = Get time of maximum... "start" "end" Parabolic
		thepointprev = thepoint - 0.01
			if thepointprev < start
				thepointprev = start + 0.001
			endif
		thepointnext = thepoint + 0.01
			if thepointnext > end
				thepointnext = end - 0.001
			endif
		themean = Get mean: thepointprev, thepointnext, "energy"

		j = thepointnext
		while j < end
			startchunk = j
			stopchunk = startchunk + 0.02
			if stopchunk < end
				thechunkmean = Get mean: startchunk, stopchunk, "energy"
				thechunkSD = Get standard deviation: startchunk, stopchunk
				if ((abs(thechunkmean-themean)<3) and (thechunkSD<2))
					stopintensity = stopchunk 
					j = j + 0.02
				else
					stopintensity = startchunk
					j = end
				endif
			else
				stopintensity = startchunk
				j = end
			endif
		endwhile

		k = thepointprev
		while k > start
			stopchunk = k
			startchunk = stopchunk - 0.02
			if startchunk > start
				thechunkmean = Get mean: startchunk, stopchunk, "energy"
				thechunkSD = Get standard deviation: startchunk, stopchunk
				if ((abs(thechunkmean-themean)<3) and (thechunkSD<2))
					startintensity = startchunk 
					k = k - 0.02
				else
					startintensity = stopchunk
					k = start
				endif
			else
				startintensity = stopchunk
				k = start
			endif
		endwhile

		select TextGrid 'fileName$'
		Insert boundary: 2, startintensity
		Insert boundary: 2, stopintensity		
		locusintensity = startintensity + ((stopintensity - startintensity)/2)
		Insert point: 3, locusintensity, string$(i-1)
		lowinterval = Get low interval at time: 2, locusintensity
		Set interval text: 2, lowinterval, string$(i-1)
		lowintervalBis = Get low interval at time: 1, locusintensity
		Set interval text: 1, lowintervalBis, string$(i-1)

	endfor
	select intensityISI
	Remove
	endif

	# TextGrid is saved with the same filename and in the same directory as the sound files.
	minus Sound 'fileName$'
	select TextGrid 'fileName$'
	Write to text file... 'directory$''fileName$'.TextGrid

	select all
	minus Strings fileList
	Remove

endfor
###################################################################################################################################################################################
## This part of the script performs sound file analysis and extracts the measures.
# Create results file and the header for the columns
fileappend "'directory$''resultfile$'.txt" Speaker'tab$'Sex'tab$'Language'tab$'FileName'tab$'Module'tab$'Task'tab$'TierName'tab$'Label'tab$'Comment'tab$'Start'tab$'End'tab$'Duration'tab$'EIS'tab$'SpeechRate'tab$'Mean_EIS'tab$'SD_EIS'newline$'

# Make a list of all sound files in the directory.
Create Strings as file list... fileList 'directory$'*.'filetype$'

# Loop through all files.
numberOfFiles = Get number of strings
for n from 1 to numberOfFiles

	# Start by selecting the first file from the list and go through the list.  
	select Strings fileList
	soundFile$ = Get string: n
	Read from file... 'directory$''soundFile$'

	# A variable called "fileName$" gets the same name as the sound file.
	fileName$ = selected$ ("Sound")

	# Open a matching textgrid file
	Read from file... 'directory$''fileName$'.TextGrid
	select TextGrid 'fileName$'

	# Call a procedure @split from a praat script positioned at the end of this file.

	@split ("_", fileName$)
	for s to split.length
		str$[s] = split.array$[s]
	endfor

######################### Parametrage en fonction du nom du fichier ###########
	speaker$ = ""
	sex$ = ""
	language$ = ""
	module$ = "texte"
	task$ = "stimuli"

	if split.length == 8
		speaker$ = split.array$[3]
		sex$ = split.array$[2]
		language$ = split.array$[1]
		module$ = split.array$[7]
		task$ = split.array$[8]

	elsif split.length == 6
		speaker$ = split.array$[1]
		module$ = split.array$[5]
		task$ = split.array$[6]

	endif


########################################################################

	# Get measures of each interval on each tier
	for t to 2
	 	tierName$ = Get tier name: t
		numberOfIntervals = Get number of intervals: t
		
		numberLabeledIntervals = 0
		sumIntervalDuration = 0

		for j to numberOfIntervals
		intervalLabel$ = Get label of interval: t, j
			if intervalLabel$ <> ""
				intervalStart = Get starting point: t, j
				intervalEnd = Get end point: t, j
				intervalDuration = intervalEnd - intervalStart
				numberLabeledIntervals = numberLabeledIntervals + 1
				sumIntervalDuration = sumIntervalDuration + intervalDuration

				#Check if there is a comment in a Comments tier for any of the annotated intervals on other tiers.
				numberOfIntervalsComments = Get number of intervals: 4
				
				comment$ = ""

				for c to numberOfIntervalsComments
					intervalCommentLabel$ = Get label of interval: 4, c
				
					if intervalCommentLabel$ <> ""
						intervalStartComments = Get starting point: 4, c
						intervalEndComments = Get end point: 4, c

						if not (  (intervalEndComments < intervalStart) or (intervalStartComments > intervalEnd)  )
							comment$ = comment$ + " " + intervalCommentLabel$
						endif
					endif
				endfor
				# Write to the result file.
				fileappend "'directory$''resultfile$'.txt" 'speaker$''tab$''sex$''tab$''language$''tab$''fileName$''tab$''module$''tab$''task$''tab$''tierName$''tab$''intervalLabel$''tab$''comment$''tab$''intervalStart:3''tab$''intervalEnd:3''tab$''intervalDuration:3''newline$'
			endif
		endfor
	endfor
	
	# Get measures on point tier 3
	tierName$ = Get tier name: 3
	numberOfPoints = Get number of points: 3
		
	numberLabeledPoints = 0
	eisSum = 0
	for j to numberOfPoints
	pointLabel$ = Get label of point: 3, j
		if pointLabel$ <> ""
			pointTime = Get time of point: 3, j
			numberLabeledPoints = numberLabeledPoints + 1
			if j> 1
				previousTime = Get time of point: 3, j-1
				eis = pointTime - previousTime
				eisSum = eisSum + eis
			# Write to the result file.
			fileappend "'directory$''resultfile$'.txt" 'speaker$''tab$''sex$''tab$''language$''tab$''fileName$''tab$''module$''tab$''task$''tab$''tierName$''tab$''pointLabel$''tab$''tab$''pointTime:3''tab$''tab$''tab$''eis:3''newline$'
			else
			# Write to the result file.
			fileappend "'directory$''resultfile$'.txt" 'speaker$''tab$''sex$''tab$''language$''tab$''fileName$''tab$''module$''tab$''task$''tab$''tierName$''tab$''pointLabel$''tab$''tab$''pointTime:3''newline$'
			endif
		endif
	endfor
	
	# Calculate overall statistics: speech rate, mean EIS
	numberOf1Intervals = Get number of intervals: 1
	startDuration = Get end point: 1, 1
	stopDuration = Get starting point: 1, numberOf1Intervals
	numberOf1Intervals = numberOf1Intervals - 2
	totalDuration = stopDuration - startDuration
	speechRate = numberOf1Intervals/totalDuration
	numberofPoints = Get number of points: 3
	meanEIS = eisSum/(numberofPoints-1)
	
	# Write to the result file.
	fileappend "'directory$''resultfile$'.txt" 'speaker$''tab$''sex$''tab$''language$''tab$''fileName$''tab$''module$''tab$''task$''tab$'Overall'tab$'Overall'tab$''tab$''tab$''tab$''tab$''tab$''speechRate:3''tab$''meanEIS:3''newline$'

endfor

# Clean up the Praat objects window.
select all
Remove

###################################################################
# Split .str$ on .sep$ and store each found element in .array$ and
# length of .array$ in .length
#
# Usage:
#  include /path/to/this/script.praat
#  [code]
#  @split (SEPARATOR, STRING)
#  for i to split.length
#    str$[i] = split.array$[i]
#  endfor
#
# where SEPARATOR is a separator string and STRING is a string to
# separate.
#
# If string$ = "hello world", then after
# @split (" ", string$)
# split.array$[1] contains "hello" and split.array$[2] contains "world"
#
# Notes:
# - Since .length stores the number of items separated by a string, it is always
#   larger than the amount of occurences of that string by one, which means
#   it can be used to count occurences as well.
# - This script has been changed to use the new Praat syntax, which
#   started being introduced after v.5.3.44. It can be made to work with
#   the old syntax by replacing the definition to
#
#       procedure split .sep$ .str$
#
#   and, with the same example as above, calling it with
#
#       call split " " 'string$'
#
# Written by Jose J. Atria (28 February 2012)
# Last updated: 20 February 2014
# This script is free software: you can redistribute it and/or modify 
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# A copy of the GNU General Public License is available at
# <http://www.gnu.org/licenses/>.

procedure split (.sep$, .str$)
  .seplen = length(.sep$) 
  .length = 0
  repeat
    .strlen = length(.str$)
    .sep = index(.str$, .sep$)
    if .sep > 0
      .part$ = left$(.str$, .sep-1)
      .str$ = mid$(.str$, .sep+.seplen, .strlen)
    else
      .part$ = .str$
    endif
    .length = .length+1
    .array$[.length] = .part$
  until .sep = 0
endproc
