I had some difficulties with previous post due the fact that I did not get precise coordinates for mapping, and I finally try this alternative code, which gave me better results and I made final maps for each year from dispatch file.

Please find below detail explanation with code.

Script 1:

import re, os


source = "C:/Users/Tatjana Smiljnic/Desktop/univie-tnt-2019.github.io/Lesson_10/tgn_xml_0619/" # xml files
oldPath = "C:/Users/Tatjana Smiljnic/Desktop/univie-tnt-2019.github.io/Lesson07/wget-activehistory/" # first dispatch
newPath = "C:/Users/Tatjana Smiljnic/Desktop/Lesson_10_new/" # new path to save files

lof = os.listdir(oldPath)

def generate(filter): #function where filter will be our input (the year)

    topCountDictionary = {} #creating dictionary for tgn-number and frequency
    counter = 0 #general counter to keep track of the progress

    for f in lof: # looping through our all XML files
        if f.startswith("dltext"): # fileName test        
            with open(oldPath + f, "r", encoding="utf8") as f1:
                text = f1.read() # getting the content of the file
                text = text.replace("&", "&") # cleaning the content of the file (that we do not need)
                # find the date:
                date = re.search(r'<date value="([\d-]+)"', text).group(1)

                if date.startswith(filter): # making sure the output only includes data for the input year
                    for tg in re.findall(r"(tgn,\d+)", text): # finding the tgn info
                        tgn = tg.split(",")[1] # reducing the tgn informationen to the tgn number
                        # making a dictionary with tgn number as key and counting the frequency of its occurence:
                        if tgn in topCountDictionary:
                            topCountDictionary[tgn] += 1
                        else:
                            topCountDictionary[tgn]  = 1

    top_TSV = [] #creating a list to write a csv-file
    # creating the list with our data:
    for tgnnumber, frequency in topCountDictionary.items():
        val = "%09d\t%s\t01-01-%s" % (frequency, tgnnumber, filter) # defining the precise syntax for the csv file
        top_TSV.append(val) # putting the information into the list                    

    # saving the data in csv format:
    header = "freq\ttgn\ttimestamp\n"
    with open("dispatch_toponyms_%s.tsv" % filter, "w", encoding="utf8") as f9:
        f9.write(header+"\n".join(top_TSV)) # wirting the file into numerous lines incl. a header-line

# using our function for different years:
generate("1861")
generate("1862")
generate("1863")
generate("1864")
generate("1865")

def generateTGNdata(source): # function taking in the XML-files

    lof = os.listdir(source) # creating a list out of the files

    tgnList = []
    tgnListNA = []
    count = 0
    print("Looping through list of XML files now")
    for f in lof: # looping through the list of files
        if f.startswith("TGN"): # fileName test

            with open(source+f, "r", encoding="utf8") as f1: # opening and reading the files
                data = f1.read()

                data = re.split("</Subject>", data) # putting the content of the file into a list and splitting for every Subject

                for d in data: # looping through the whole list of Subjects
                    d = re.sub("\n +", "", d)

                    if "Subject_ID" in d:
                        # SUBJECT ID
                        placeID = re.search(r"Subject_ID=\"(\d+)\"", d).group(1)
                        # NAME OF THE PLACE
                        placeName = re.search(r"<Term_Text>([^<]+)</Term_Text>", d).group(1)
                        # COORDINATES

                        if "<Coordinates>" in d:
                            latGr = re.search(r"<Latitude>(.*)</Latitude>", d).group(1)
                            declat = re.search(r"<Decimal>(.*)</Decimal>", latGr).group(1)
                            # extracting degrees and minutes 
                            degrees = re.search(r"<Degrees>(.*)</Degrees>", latGr).group(1)
                            minutes = re.search(r"<Minutes>(.*)</Minutes>", latGr).group(1)
                            # calculating the decimal with dividing minutes  by 60 which will give us a float decimal (R)
                            # try and catch in case any of the used variables is empty.
                            try:
                                minlat = float(minutes)/60
                                # than add the degree and the result of the calculation and make a string out of it str(float(D)+R)
                                lat = float(degrees)+minlat
                                lat = str(lat)
                                if "-" in declat: # as decimal is showing the direction with both positive and negative numbers
                                    lat = "-"+lat # a minus has to be added in front of some values, to have unique value and precise data
                            except ValueError: # because some tags are empty
                                # print("Latitude can not be converted to a float", d). Everything that empty is should be recognized as NA and we do not use this data for our final mapping
                                lat = "NA"

                           # same for longitude:
                            lonGr = re.search(r"<Longitude>(.*)</Longitude>", d).group(1)
                            declon = re.search(r"<Decimal>(.*)</Decimal>", lonGr).group(1)
                            degrees = re.search(r"<Degrees>(.*)</Degrees>", lonGr).group(1)
                            minutes = re.search(r"<Minutes>(.*)</Minutes>", lonGr).group(1)
                            try:
                                minlon = float(minutes)/60
                                lon = float(degrees)+minlon
                                lon = str(lon)
                                if "-" in declon:
                                    lon = "-"+lon
                            except ValueError:
                                # print("Longitude can not be converted to a float", d)
                                lon = "NA"
                        else:
                            lat = "NA"
                            lon = "NA"

                        tgnList.append("\t".join([placeID, placeName, lat, lon])) # finally, creating a list of all the places with coordinates
                        # print(tgnList)

                        if lat == "NA" or lon == "NA":# making a list of all the places without coordinates (should be printed, but we do not use for final mapping)
                            # print("\t"+ "; ".join([placeID, placeName, lat, lon]))
                            tgnListNA.append("\t".join([placeID, placeName, lat, lon]))

    # saving the files:
    header = "tgnID\tplacename\tlat\tlon\n" # headers delimited by a TAB
    with open("tgn_data_light.tsv", "w", encoding="utf8") as f9a: # saving the list with coordinates
         f9a.write(header+"\n".join(tgnList))

    with open("tgn_data_light_NA.tsv", "w", encoding="utf8") as f9b: # saving the list of places without coordinates
         f9b.write(header+"\n".join(tgnListNA))

    print("TGN has %d items" % len(tgnList)) # print
    print("%d items do not have coordinates" % len(tgnListNA)) # print 

generateTGNdata(source)

Script 2:

import re, os, csv

# loading TGN data into a dictionary (with a function):
def loadTGN(tgnTSV): # defining the function 
    with open(tgnTSV, "r", encoding="utf8") as f1: # opening the previously created (and saved) tgnTSV-file
        data = f1.read().split("\n") # creating a variable containing the data of the tgnTSV-file separated into lines
        dic = {} #making the dict we will use
        # looping through every line:
        for d in data:
            d = d.split("\t") # splitting the data with TABs
            dic[d[0]] = d # filling the dict with the keys and values

    return(dic)

# matching our data-files with the tgn-data:
def match(freqFile, dicToMatch): # defining the function and its intakes
  # opening the previously made and saved freqFile:
    with open(freqFile, "r", encoding="utf8") as f1:
        data = f1.read().split("\n")
        # creating all the lists we need:    
        dataNew = []
        dataNewNA = []
        count = 0 # general counter to keep track of the progress

        for d in data[1:]: # looping through our toponymes-file
            tgnID = d.split("\t")[1] # getting the tgn-number so we can compare it to the one in the dict, start counting the columns with 0
            freq  = d.split("\t")[0] # getting the frequency so we can write it into a csv
            timestamp = d.split("\t")[2]
            if tgnID in dicToMatch: # matching places to the dict by tgn-number
                val = "\t".join(dicToMatch[tgnID]) # variable for the entry in the dict
                val  = val + "\t" + freq + "\t" + timestamp # putting the frequency and the timestamp to the entry in the dict

                if "\tNA\t" in val: # creating lists with the toponymes without and with  coordinates
                    dataNewNA.append(val)
                else:
                    dataNew.append(val)
            else:
                print("%s (%d) not in TGN!" % (tgnID, int(freq))) # states which tgnID could not be found and what frequency it had
                count += 1
    # defining the names of the header:
    header = "tgnID\tplacename\tlat\tlon\tfreq\ttime_stamp\n"
    #creating a file with all the toponymes with their coordinates:
    with open("coord_"+freqFile, "w", encoding="utf8") as f9a:
        f9a.write(header + "\n".join(dataNew))
    #creating a file with all the toponymes without coordinates:
    with open("coord_NA_"+freqFile, "w", encoding="utf8") as f9b:
        f9b.write(header + "\n".join(dataNewNA))

    print("%d items have not been matched..." % count) # print all the items that could not been matched


dictionary = loadTGN("tgn_data_light.tsv") # variable for the Getty Gazetteer we previously cleaned up
# running and matching the two functions using our toponyms and the "tgn_data_light.tsv"
match("dispatch_toponyms_1861.tsv", dictionary)
match("dispatch_toponyms_1862.tsv", dictionary)
match("dispatch_toponyms_1863.tsv", dictionary)
match("dispatch_toponyms_1864.tsv", dictionary)
match("dispatch_toponyms_1865.tsv", dictionary)

Results:

With this results (picture below) we can see that coordinates are more precise to make final mapping in QGIS.

Image

Maps:

In QGIS, I made add Layer as Delimited Text Layer and styled the layer with size of circles, transparency, labels for each year. Finally, I installed Time Manager plugin to animate my maps over time and saved each build. I created gif file to see changes over time.

Map

As well each map created with Time manager plugin.

Map1

Map2

Map3

Map4

Map5