import requests
import math
import json

"""
This script extracts official country names based on the UNTERM database.

The United Nations Terminology Database is a multilingual database for official terminology relevant to the work of the United Nations
Official terminology is provided in the six official languages of the United Nations — Arabic, Chinese, English, French, Spanish and Russian.
"""

GET_RECORDS_ID_URL = "https://conferences.unite.un.org/untermapi/api/term/search?query=*&page="
GET_INFO_COUNTRY_URL = "https://conferences.unite.un.org/untermapi/api/record/"

OUTPUT_FILE_NAME = "un_formal_country_names.json"
OUTPUT_FILE_NAME_SMALL = "un_formal_country_names_min.json"

REQUEST_HEADERS = {
    'content-type': 'application/json',
    'accept': 'application/json, text/plain'
}
REQUEST_DATA = {
    "searchTerm": "*",
    "searchType": 0,
    "searchLanguages": ["ar", "es", "en", "fr", "ru", "zh"],
    "languagesDisplay":  ["en", "fr"],
    "datasets":  ["UNHQ"],
    "bodies": [],
    "subjects": ["Country names"],
    "recordTypes": [],
    "acronymSearch": True,
    "localDBSearch": True,
    "termTitleSearch": True,
    "phraseologySearch": False,
    "footnoteSearch": False,
    "fullTextSearch": False,
    "facetedSearch": True,
    "buildSubjectList": False
}

languagesIDToLanguage = {
    '5a504ded-6ba9-49c9-98aa-c8418cbb1ce9': 'arabic',
    '676ccfad-a7b3-4e36-8d49-b21a9b5e0d7d': 'chinese',
    '77ebac02-6a97-4722-859a-de5896a4d34e': 'english',
    'ca7853cf-0f94-4aa6-be0a-ea586361c19e': 'french',
    '496bcad8-7cd0-4f46-85c7-23b91e7a4ad2': 'russian',
    'af320177-7bdb-40df-9521-6d2e0478f129': 'spanish',
}


def main():
    countries = {}

    recordsID = getRecordsID()

    for records in recordsID:
        infoLang = getInfoCountry(records)
        countries[infoLang['iso_a2']] = infoLang

    # Sort the dictionnary to get the same result every times.
    countries = {i2: countries[i2] for i2 in sorted(list(countries.keys()))}

    with open(OUTPUT_FILE_NAME, 'w') as f:
        json.dump(countries, f)

    with open(OUTPUT_FILE_NAME_SMALL, 'w') as f:
        json.dump(countries, f, ensure_ascii=False)

def getRecordsID():
    """
        This function extracts the recordID of every results of every page so they can be queried later
    """
    def getRecordsIDFromResponse(resp):
        """
        This function extracts all the recordID of a page and return them.
        """
        data = resp.json()
        if 'results' not in data:
            print("Missing results in request")
            exit(4)

        return [r['recordID'] for r in data['results'] if 'recordID' in r]

    firstRequest = requests.post(GET_RECORDS_ID_URL + "0", data=json.dumps(REQUEST_DATA), headers=REQUEST_HEADERS)
    if firstRequest.status_code != 200:
        print("The first request failed. Check the URL")
        print(firstRequest.status_code)
        print(firstRequest.text)
        exit(1)

    data = firstRequest.json()

    if 'results' not in data:
        print("Missing results")
        exit(3)

    recordsID = getRecordsIDFromResponse(firstRequest)
    nbPage = int(math.ceil(data['totalHits']) / int(data['pageSize']))

    for page in range(1, nbPage + 1):
        print(page)
        res = requests.post(GET_RECORDS_ID_URL + str(page), data=json.dumps(REQUEST_DATA), headers=REQUEST_HEADERS)
        if res.status_code != 200:
            print("Unable to get a page that should be accessible")
            exit(5)

        recordsID.extend(getRecordsIDFromResponse(res))

    return recordsID

def getInfoCountry(recordID):
    """
        This function extract the information of a recordID.
        Each record should match a country so this function extract the names of a country in the 6 offical UN languages.
    """
    def removeExtraInfo(countryName):
        if '(' in countryName:
            return countryName[:countryName.index('(')].strip()
        else:
            return countryName

    res = requests.get(GET_INFO_COUNTRY_URL + recordID)
    if res.status_code != 200:
        print("Error {res.status_code} requesting {rec}")
        exit(2)

    data = res.json()
    countryInfo = {}

    for lang in sorted(data['languages']):
        lang = lang.lower()
        countryInfo[lang] = {'short': '', 'full': ''}

        for i in range(len(data[lang]['terms'])):
            countryInfo[lang][data[lang]['terms'][i]['termType']] = data[lang]['terms'][i]['term']

        # The UK doesn't have official short name in russian. ¯\_(ツ)_/¯
        if not countryInfo[lang]['short'] and countryInfo[lang]['full']:
            countryInfo[lang]['short'] = countryInfo[lang]['full']

        try:
            countryInfo[lang]['map_usable'] = removeExtraInfo(countryInfo[lang]['short'])
        except KeyError:
            print(f"What the fuck {lang}?!")
            print(f"Missing name for this country in {lang}", countryInfo)


    for field in sorted(data['specialFields'], key=lambda item: item['name']):
        match field['name']:
            case "ISO Country alpha-2-code":
                countryInfo['iso_a2'] = field['value']

            case "ISO Country alpha-3-code":
                countryInfo['iso_a3'] = field['value']

            case 'Monetary Unit':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['monetary_unit'] = field['value']

            case 'Currency Designation':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['currency_designation'] = field['value']

            case 'Currency Symbol':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['currency_symbol'] = field['value']

            case 'Fractional Unit':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['fractional_unit'] = field['value']

            case 'Capital City':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['capital_city'] = field['value']

            case 'Adjective':
                if field['languageId'] in languagesIDToLanguage:
                    lang = languagesIDToLanguage[field['languageId']]
                    countryInfo[lang]['adjective'] = field['value']

            case 'Date of Entry in UN':
                countryInfo['un_entry_date'] = field['value']

    return countryInfo


if __name__ == "__main__":
    main()