# encoding=utf-8
"""
Tools to manage database and file migrations between different MonPaGe versions.

This module provides utilities for:
- Renaming and restructuring participant files
- Managing database schema changes
- Verifying database integrity
- Merging databases from different sources
"""

__author__ = "Roland Trouville"
__copyright__ = "Copyright 2015+, Consortium MonPaGe"
__license__ = "Creative Commons 4.0 By-Nc-Sa"
__maintainer__ = "Roland Trouville"
__email__ = "contact.monpage@gmail.com"
__status__ = "Production"

import os
import sqlite3
from os.path import join
from typing import Any, Callable, Dict, List, Tuple

from tools.csv_manager import CSVManager
from tools.db_manager import DBManager
from tools.general_tools import GeneralTools
from tools.options import Options


class MigrationTools:
	"""
	A utility class for handling data migration and renaming operations in a participant-based data structure.

	This class provides static methods to:

	- Rename specific sets of files (e.g., Laurie-Question-2, pseudowords, session-tagged files)
	- Perform file name corrections or updates to follow new naming conventions
	- Migrate legacy participant data structures to a new format
	- Verify the integrity and completeness of participant result databases
	- Generate standardized file paths from filenames
	- Merge multiple participant result databases into consolidated files
	"""

	@staticmethod
	def __rename_in_participant(
		participant_code: str, rename_function: Callable[[str], bool]
	) -> bool:
		"""
		Apply a rename function to files within a participant's directory.

		This is a helper method used by specific renaming methods to traverse
		a participant's directory and apply file renaming operations.

		Args:
			participant_code (str): The unique identifier for the participant
			rename_function (Callable[[str], bool]): A function that takes a path and returns
				True if any files were renamed, False otherwise

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		directory_path = join("data", "participant", participant_code)
		if not os.path.isdir(directory_path):
			return False

		renamed_any = False

		for path in GeneralTools.get_file_tree(directory_path, 2):
			if rename_function(path):
				renamed_any = True

		return renamed_any

	@staticmethod
	def rename_laurie_question_2_v2(path: str) -> bool:
		"""
		Rename Laurie-Question-2 audio files to a consistent format.

		Searches through a directory for files containing "Laurie-Question-2"
		and standardizes their filename format.

		Args:
			path (str): Path to the directory containing files to rename

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		if not os.path.isdir(path):
			return False

		renamed_any = False

		for filename in os.listdir(path):
			if "Laurie-Question-2" in filename:
				tmp = filename.split("_")
				tmp[-1] = "Laurie-Question-2.wav"
				new_filename = "_".join(tmp)
				os.rename(
					join(path, filename),
					join(path, new_filename),
				)

				renamed_any = True

		return renamed_any

	@staticmethod
	def rename_laurie_question_2(participant_code: str) -> bool:
		"""
		Rename all Laurie-Question-2 files for a specific participant.

		Args:
			participant_code (str): The unique identifier for the participant

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		return MigrationTools.__rename_in_participant(
			participant_code, MigrationTools.rename_laurie_question_2_v2
		)

	@staticmethod
	def rename_xdiado_v2(path: str) -> bool:
		"""
		Remove 'xdiado' component from filenames in a directory.

		Searches for files containing "xdiado" in the filename, specifically
		as the second-to-last component, and removes it from the filename.

		Args:
			path (str): Path to the directory containing files to rename

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		if not os.path.isdir(path):
			return False

		renamed_any = False

		for filename in os.listdir(path):
			if "xdiado" in filename:
				tmp = filename.split("_")
				if tmp[-2] == "xdiado":
					del tmp[-2]
					new_filename = "_".join(tmp)
					os.rename(
						join(path, filename),
						join(path, new_filename),
					)
					renamed_any = True

		return renamed_any

	@staticmethod
	def rename_xdiado(participant_code: str) -> bool:
		"""
		Remove 'xdiado' component from all filenames for a specific participant.

		Args:
			participant_code (str): The unique identifier for the participant

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		return MigrationTools.__rename_in_participant(
			participant_code, MigrationTools.rename_xdiado_v2
		)

	@staticmethod
	def rename_pseudomots_v2(path: str) -> bool:
		"""
		Fix pseudowords filenames by removing unwanted spaces.

		Searches for specific pseudoword filenames that contain spaces
		before the file extension and removes those spaces.

		Args:
			path (str): Path to the directory containing files to rename

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		if not os.path.isdir(path):
			return False

		renamed_any = False

		for filename in os.listdir(path):
			tmp = filename.split("_")

			# Remove spaces before the file extension in specific pseudoword files
			if tmp[-1] in (
				"g-menabainban .wav",
				"g-oui-oui-oui .wav",
				"g-takadacha .wav",
				"g-fichoussu .wav",
			):
				tmp[-1] = tmp[-1].replace(" ", "")
				new_filename = "_".join(tmp)
				os.rename(
					join(path, filename),
					join(path, new_filename),
				)
				renamed_any = True

		return renamed_any

	@staticmethod
	def rename_pseudomots(participant_code: str) -> bool:
		"""
		Fix pseudowords filenames for a specific participant by removing unwanted spaces.

		Args:
			participant_code (str): The unique identifier for the participant

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		return MigrationTools.__rename_in_participant(
			participant_code, MigrationTools.rename_pseudomots_v2
		)

	@staticmethod
	def rename_session_files_v2(path: str) -> bool:
		"""
		Add session number to filenames in a multi-session directory.

		For directories containing "_sess" in their name (indicating a multi-session folder),
		this method ensures that all files have the proper session number prefix ("S"+number)
		in the correct position of the filename.

		Args:
			path (str): Path to the directory containing files to rename

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		if not os.path.isdir(path):
			return False

		dirname_lvl2 = os.path.normpath(path).split(os.sep)[-1]

		if "_sess" not in dirname_lvl2:
			return False

		renamed_any = False

		# We have a multisession folder
		for filename in os.listdir(path):
			tmp = filename.split("_")

			if not tmp[-3].startswith("S"):
				tmp_sn = dirname_lvl2.split("_sess")
				session_number = tmp_sn[1]
				tmp2 = tmp[:-2] + ["S" + session_number] + tmp[-2:]
				new_filename = "_".join(tmp2)
				os.rename(
					join(path, filename),
					join(path, new_filename),
				)

				renamed_any = True

		return renamed_any

	@staticmethod
	def rename_session_files(participant_code: str) -> bool:
		"""
		Update all session files for a specific participant to include session numbers.

		Args:
			participant_code (str): The unique identifier for the participant

		Returns:
			bool: True if any files were renamed, False otherwise
		"""
		return MigrationTools.__rename_in_participant(
			participant_code, MigrationTools.rename_session_files_v2
		)

	@staticmethod
	def change_participant_structure() -> bool:
		"""
		Migrate participant data to a new directory structure.

		Transfers participant information from a single CSV file to individual
		data.csv files in each participant's directory. The original participants.csv
		is backed up after successful migration.

		Returns:
			bool: True if structure was correctly changed, False otherwise
		"""

		participants_folder = join("data", "participant")
		participants_csv = join(participants_folder, "participants.csv")

		ok = True
		participants: list[list[str]] = CSVManager.read_file(participants_csv, "\t")

		for participant in participants:
			if os.path.isdir(join(participants_folder + participant[0])):
				to_write = [
					[participant[0]],
					[participant[1]],
					[participant[2]],
				]

				CSVManager.overwrite_lines(
					join(participants_folder, participant[0], "data.csv"),
					"\t",
					to_write,
				)
			else:
				ok = False

		if ok:
			os.rename(
				participants_csv,
				join(participants_folder, "participants.back.csv"),
			)

		return ok

	@staticmethod
	def verify_participant_result_db(participant_code: str) -> List[str]:
		"""
		Verify the integrity of a participant's result database.

		Checks that the participant's result database exists and contains all expected tables
		with the required fields. If the error_verbatim field is missing from results_pw table,
		it will be added. For missing tables, they will be created.

		This method also counts and returns results for each module by session and judge.

		Args:
			participant_code (str): The unique identifier for the participant

		Returns:
			list: A list of strings containing verification results and statistics
		"""
		ret = []

		db_filepath = "./data/participant/" + participant_code + "/"
		if not os.path.isfile(
			db_filepath + "cotation_result_" + participant_code + ".db"
		):
			ret.append(
				"Fichier cotation_result_"
				+ participant_code
				+ ".db introuvable dans /data/participant/"
				+ participant_code
				+ "/"
			)
		else:
			with DBManager(
				db_filepath + "cotation_result_" + participant_code + ".db"
			) as participant_result_db:
				table_list: Dict[str, List[str]] = {
					"results_pw": [
						"pseudo_word_id",
						"syll_pos",
						"phon_pos",
						"session_path",
						"participant",
						"judge",
						"is_cluster",
						"state",
						"effort",
						"inversion",
						"ajout",
						"error_type",
						"error_nature",
						"error_verbatim",
					],
					"results_qa": [
						"question_id",
						"stimuli_id",
						"value",
						"session_path",
						"participant",
						"judge",
					],
					"results_int": [
						"word",
						"score",
						"session_path",
						"participant",
						"judge",
					],
				}
				for table in table_list:
					try:
						res = participant_result_db.execute(
							f"SELECT * FROM {table} LIMIT 1"
						)
						found_field: List[str] = []
						for desc in res.description:
							found_field.append(desc[0])
						for fieldname in table_list[table]:
							if fieldname not in found_field:
								ok = False
								if (
									fieldname == "error_verbatim"
									and table == "results_pw"
								):
									ok = True
									participant_result_db.execute(
										"ALTER TABLE `results_pw` ADD COLUMN `error_verbatim` TEXT DEFAULT NULL"
									)
								if not ok:
									ret.append(
										"Impossible de trouver le champ "
										+ fieldname
										+ " dans la table "
										+ table
									)
					except sqlite3.OperationalError:
						participant_result_db.execute(
							DBManager.get_sql_creation_script_for_predefined_tables(
								table
							)
						)

				if Options.is_enabled(Options.Option.RESEARCH):
					modules: List[List[str]] = [
						["Intelligibilite", "results_int"],
						["PneumoPhonatoire", "results_qa"],
						["PseudoMots", "results_pw"],
						["Diadoco", "results_qa"],
						["Phrases", "results_qa"],
						["Texte", "results_qa"],
						["Acoustique", "step_data"],
					]
				else:
					modules: List[List[str]] = [
						["Intelligibilite", "results_int"],
						["PseudoMots", "results_pw"],
						["Acoustique", "step_data"],
					]

				for module in modules:
					sql: str = ""

					if module[1] == "results_pw":
						sql = "SELECT session_path, COUNT(DISTINCT pseudo_word_id) AS nb, judge FROM results_pw "
					elif module[1] == "results_qa":
						sql = "SELECT session_path, COUNT(DISTINCT stimuli_id) AS nb, judge FROM results_qa "
					elif module[1] == "results_int":
						sql = "SELECT session_path, COUNT(*) AS Nb, judge FROM results_int "

					if module[1] == "step_data":
						sql = """
						SELECT session_date AS session_path, COUNT(*) AS nb, judge
						FROM step_data
						WHERE participant = ?
						GROUP BY session_path, judge
						"""
						res = participant_result_db.execute(sql, (participant_code,))
					else:
						sql += "WHERE participant = ? AND session_path LIKE '%' || ? || '%' GROUP BY session_path, judge"
						res = participant_result_db.execute(
							sql, (participant_code, module[0])
						)

					datas = res.fetchall()
					if datas is not None and len(datas) > 0:
						for data in datas:
							ret.append(
								str(module[0])
								+ ": Session "
								+ str(data[0])
								+ " - Juge "
								+ str(data[2])
								+ " - "
								+ str(data[1])
								+ " resultat(s)"
							)
					else:
						ret.append(str(module[0]) + ": Aucun résultat trouvé")

		return ret

	@staticmethod
	def get_filepath_for_file(file: str, participant_last_index: int = 0) -> str:
		"""
		Generate a standardized path for a file based on its filename components.

		Parses a filename (e.g., FR_F_CB1_2015_12_19_ModulePseudoMots_e-laspa) into its
		participant, date, and module components to create a standardized path structure.

		Args:
			file (str): The filename or path to parse
			participant_last_index (int, optional): The index position of the last\
			component of the participant code in the split filename. Defaults to 0.

		Returns:
			str: A standardized path in the format "participant/date/module"
		"""
		# FR_F_CB1_2015_12_19_ModulePseudoMots_e-laspa
		fname = file
		tmp = file.split("/")
		if len(tmp) > 0:
			fname = tmp[-1]

		compo = fname.split("_")
		participant = "_".join(compo[0 : participant_last_index + 1])
		date = "_".join(compo[participant_last_index + 1 : participant_last_index + 4])
		module = compo[participant_last_index + 4]

		return participant + "/" + date + "/" + module

	@staticmethod
	def merge_all_cotation_dbs(source_dir: str, pbar: Any) -> bool:
		"""
		Merge multiple cotation databases into a single consolidated database.

		This method:
		1. Searches for all .db files in the source directory
		2. Creates a merged database in the results_merging subdirectory
		3. Processes each database and merges its contents
		4. Creates individual participant result databases from the merged data

		Args:
			source_dir (str): The directory containing the databases to merge
			pbar: A progress bar object that implements setMaximum, setValue, and value\
			methods for tracking the merge progress

		Returns:
			bool: True if the merge completed successfully, False otherwise
		"""
		pathfile = source_dir + "/all_cotation_results.db"
		if os.path.isfile(pathfile):
			os.remove(pathfile)

		target_dir = source_dir + "/results_merging/"
		if os.path.exists(target_dir):
			for f in os.listdir(target_dir):
				os.remove(f)

		else:
			os.mkdir(target_dir)

		if not os.path.isfile("./data/cotation/cotation.db"):
			return False

		with DBManager.get_cotation_db() as cotation_db:
			files = GeneralTools.find_files(source_dir, ".db")

			pbar.setMaximum(sum(1 for _ in files))  # len(files)
			pbar.setValue(0)
			target_result_db = cotation_db.create_user_cotation_db(
				target_dir + "all_cotation_results.db"
			)
			for f in files:
				tmp = f.split("/")
				path = "/".join(tmp[:-1]) + "/"
				tmp2 = tmp[-1].split(".")
				fname = ".".join(tmp2[:-1])
				temp = DBManager(path + fname + ".db")
				DBManager.__fuse_results_in__(temp, target_result_db)
				pbar.setValue(pbar.value() + 1)
			# QApplication.processEvents()  # Processing the other application events to allow for interrupting

			participants: Dict[str, int] = {}
			for tname in DBManager.RESULT_TABLES_TO_CREATE:
				vals = target_result_db.execute(
					f"SELECT DISTINCT participant FROM {tname}"
				).fetchall()
				for v in vals:
					participants[v[0]] = 1

			pbar.setMaximum(len(participants) * len(DBManager.RESULT_TABLES_TO_CREATE))
			pbar.setValue(0)
			for p in participants.keys():
				p_t_dir = target_dir + p + "/"
				if not os.path.exists(p_t_dir):
					os.mkdir(p_t_dir)
				tmp_db = cotation_db.create_user_cotation_db(
					p_t_dir + "cotation_result_" + p + ".db"
				)

				for tname in DBManager.RESULT_TABLES_TO_CREATE:
					tmp = target_result_db.execute(
						"SELECT name FROM sqlite_master WHERE type='table' AND name=?",
						(tname,),
					).fetchall()
					if len(tmp) != 0:
						c = target_result_db.execute(
							f"SELECT * FROM {tname} WHERE participant = ?", (p,)
						)
						cols: List[str] = [
							description[0] for description in c.description
						]
						values: List[Tuple[Any, ...]] = c.fetchall()
						sql: str = (
							DBManager.generate_table_insertion_query_with_bindings_sql(
								cols, tname
							)
						)

						if len(values) > 0:
							for v in values:
								tmp_db.execute(sql, v)

					tmp_db.commit()
					pbar.setValue(pbar.value() + 1)
			# QApplication.processEvents()  # Processing the other application events to allow for interrupting

		return True
