from typing import List

from mm_stats.definitions import DATA_PATH, logger
import pandas as pd
from mm_stats.auth import PostgresDb
from psycopg2.extensions import AsIs


def get_all_project_ids() -> list:
    """Get project ids from postgres table."""
    sql = """
        SELECT
          p.project_id
        FROM
          data_preparation.projects p
        """

    db = PostgresDb()
    project_ids_raw = db.retr_query(sql)
    # use set() because there are duplicated project ids in tasking manager
    project_ids = list(set([row[0] for row in project_ids_raw]))

    return project_ids


def get_project_ids_by_campaign(campaign_name: str) -> list:
    """Get project ids from postgres table from campaign."""
    campaign_name = f"%%{campaign_name}%%"
    sql = """
        SELECT
          p.project_id
        FROM
          data_preparation.campaigns c
          ,data_preparation.campaign_projects p
        WHERE
          c.name like %(campaign_name)s
          AND
          c.id = p.campaign_id
    """

    db = PostgresDb()
    project_ids_raw = db.retr_query(sql, {"campaign_name": campaign_name})
    # use set() because there are duplicated project ids in tasking manager
    project_ids = list(set([row[0] for row in project_ids_raw]))

    return project_ids


def get_project_ids_by_organisation(organisation_name: str) -> list:
    """Get all project ids for a specific organisation."""
    organisation_name = f"%%{organisation_name}%%"
    sql = """
        SELECT
          p.project_id
        FROM
          data_preparation.organisations o
          ,data_preparation.projects p
        WHERE
          o.name like %(organisation_name)s
          AND
          o.id = p.organisation_id
    """

    db = PostgresDb()
    project_ids_raw = db.retr_query(sql, {"organisation_name": organisation_name})
    # use set() because there are duplicated project ids in tasking manager
    project_ids = list(set([row[0] for row in project_ids_raw]))

    return project_ids


def get_project_ids_by_project_info(keyword: str) -> list:
    """Get project id by filtering project name, description, task instructions."""
    keyword = f"%%{keyword}%%"
    sql = """
            SELECT
              p.project_id
            FROM
              data_preparation.project_info p
            WHERE
              p.name like %(keyword)s
              OR
              p.short_description like %(keyword)s
              OR
              p.description like %(keyword)s
              OR
              p.instructions like %(keyword)s
              OR
              p.per_task_instructions like %(keyword)s
        """

    db = PostgresDb()
    project_ids_raw = db.retr_query(sql, {"keyword": keyword})
    # use set() because there are duplicated project ids in tasking manager
    project_ids = list(set([row[0] for row in project_ids_raw]))

    return project_ids


def get_project_ids_from_excel_column(file_name: str, column: str) -> list:
    """Filter by an excel project_id column."""
    df = pd.read_excel(DATA_PATH + file_name)
    project_ids = list(set(df[column].to_list()))
    return project_ids


def get_project_ids_by_keywords(keywords: list) -> list:
    """Get project ids for organisations and project_info matching keyword."""
    project_ids: List = []
    for keyword in keywords:

        project_ids_campaign = get_project_ids_by_campaign(campaign_name=keyword)
        project_ids_organisation = get_project_ids_by_organisation(
            organisation_name=keyword
        )
        project_ids_project_info = get_project_ids_by_project_info(keyword=keyword)

        # create union of all project ids
        project_ids = list(
            set().union(
                project_ids,
                project_ids_campaign,
                project_ids_organisation,
                project_ids_project_info,
            )
        )
    return project_ids


def get_project_ids_by_countries(countries: list) -> list:
    """Get project ids for all projects within a list of countries."""
    sql = """
        SELECT
          p.project_id
        FROM
          data_preparation.projects p
        WHERE
          -- this looks weird, but we do it to extract the country name
          replace(
            substr(
                p.country::text , 2, length(p.country::text) - 2), '"', ''
            ) = ANY( %(countries)s )
    """

    db = PostgresDb()
    project_ids_raw = db.retr_query(sql, {"countries": countries})
    # use set() because there are duplicated project ids in tasking manager
    project_ids = list(set([row[0] for row in project_ids_raw]))

    return project_ids


def create_filter_by_project_ids(filter_name: str, project_ids: list) -> None:
    """Create Filter Table using project ids."""
    sql = """
        CREATE SCHEMA IF NOT EXISTS "%(filter_name)s";
        DROP TABLE IF EXISTS "%(filter_name)s".filter;
        CREATE TABLE "%(filter_name)s".filter AS
        SELECT
          project_id
        FROM
          data_preparation.projects
        WHERE
          project_id = ANY (ARRAY%(project_ids)s)
        GROUP BY
          project_id
    """

    db = PostgresDb()
    db.query(sql, {"filter_name": AsIs(filter_name), "project_ids": AsIs(project_ids)})
    logger.info(f"created table {filter_name}.filter")