Commit a3730724 authored by iker_martin's avatar iker_martin
Browse files
parents eed0d373 ac8406db
This diff is collapsed.
This diff is collapsed.
import sys
import glob
import numpy as np
import pandas as pd
def getData(lineS, outData, tp, hasIter = False):
for data in lineS:
k_v = data.split('=')
if k_v[0] == "time":
time = float(k_v[1])
elif k_v[0] == "iters" and hasIter:
iters = int(k_v[1])
outData[tp] = time
if hasIter:
outData[tp+1] = iters
#-----------------------------------------------
def record(f, observation, line):
# Record first line - General info
lineS = line.split()
for j in range(1,7):
observation[j] = int(lineS[j].split('=')[1])
# Record procces number
line = next(f)
lineS = line.split()
j = 7
for key_values in lineS:
k_v = key_values.split('=')
observation[j] = int(k_v[1])
j+=1
# Record data
j = 9
for j in range(9, 13):
line = next(f)
lineS = line.split()
getData(lineS, observation, j)
line = next(f)
lineS = line.split()
#if observation[0] == "A":
getData(lineS, observation, 13, True)
#else:
# getData(lineS, observation, 13)
#-----------------------------------------------
def read_file(f, dataA, dataB, it):
recording = False
resizes = 0
timer = 0
previousNP = 0
for line in f:
lineS = line.split()
if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE
recording = True
it += 1
dataA.append([None]*8)
dataB.append([None]*11)
resizes = int(lineS[2].split('=')[1].split(',')[0])
matrix = int(lineS[3].split('=')[1].split(',')[0])
sdr = int(lineS[4].split('=')[1].split(',')[0])
adr = int(lineS[5].split('=')[1].split(',')[0]) #TODO Que lo tome como porcentaje
time = float(lineS[7].split('=')[1])
dataB[it][5] = matrix
dataB[it][0] = sdr
dataB[it][1] = adr
dataB[it][6] = time
dataB[it][4] = ""
dataA[it][4] = matrix
dataA[it][0] = sdr
dataA[it][1] = adr
dataA[it][5] = time
dataA[it][3] = ""
elif recording and resizes != 0: # RESIZE LINE
iters = int(lineS[2].split('=')[1].split(',')[0])
npr = int(lineS[3].split('=')[1].split(',')[0])
dist = lineS[5].split('=')[1]
dataB[it][7] = iters
dataA[it][6] = iters
resizes = resizes - 1
if resizes == 0:
dataB[it][3] = npr
dataB[it][4] += dist
dataA[it][3] += dist
dataA[it][2] = str(previousNP) + "," + str(npr)
timer = 3
else:
dataB[it][2] = npr
dataB[it][4] += dist + ","
dataA[it][3] += dist + ","
previousNP = npr
else: # SAVE TIMES
if timer == 3:
dataB[it][8] = float(lineS[1])
elif timer == 2:
dataB[it][9] = float(lineS[1])
elif timer == 1:
dataB[it][10] = float(lineS[1])
else:
dataA[it][7] = float(lineS[1])
timer = timer - 1
return it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "Time", "Iters", "TE"] #7
#columnsB1 = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "Time", "Iters", "TC", "TS", "TA"] #10
#Config loaded: resizes=2, matrix=1000, sdr=1000000000, adr=0, aib=0, time=2.000000 || grp=1
#Resize 0: Iters=100, Procs=2, Factors=1.000000, Phy=2
#Resize 1: Iters=100, Procs=4, Factors=0.500000, Phy=2
#Tspawn: 0.249393
#Tsync: 0.330391
#Tasync: 0
#Tex: 301.428615
#-----------------------------------------------
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
exit(1)
if len(sys.argv) >= 3:
BaseDir = sys.argv[2]
print("Searching in directory: "+ BaseDir)
else:
BaseDir = sys.argv[2]
if len(sys.argv) >= 4:
print("Csv name will be: " + sys.argv[3] + ".csv")
name = sys.argv[3]
else:
name = "data"
insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*Global.o*")
print("Number of files found: "+ str(len(lista)));
it = -1
dataA = []
dataB = []
columnsA = ["N", "%Async", "Groups", "Dist", "Matrix", "Time", "Iters", "TE"] #7
columnsB = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "Time", "Iters", "TC", "TS", "TA"] #10
for elem in lista:
f = open(elem, "r")
it = read_file(f, dataA, dataB, it)
f.close()
#print(data)
dfA = pd.DataFrame(dataA, columns=columnsA)
dfA.to_csv(name + '_G.csv')
dfB = pd.DataFrame(dataB, columns=columnsB)
dfB.to_csv(name + '_M.csv')
import sys
import glob
import numpy as np
import pandas as pd
#-----------------------------------------------
def read_file(f, data, it):
matrix = 0
sdr = 0
adr = 0
time = 0
recording = False
it_line = 0
aux_it = 0
iters = 0
np = 0
np_par = 0
ns = 0
for line in f:
lineS = line.split()
if len(lineS) > 1:
if recording:
aux_it = 0
if it_line==0:
lineS.pop(0)
for observation in lineS:
data.append([None]*11)
data[it+aux_it][0] = sdr
data[it+aux_it][1] = adr
data[it+aux_it][2] = np
data[it+aux_it][3] = np_par
data[it+aux_it][4] = ns
data[it+aux_it][5] = matrix
data[it+aux_it][6] = time
data[it+aux_it][7] = iters
data[it+aux_it][8] = float(observation)
aux_it+=1
it_line = it_line + 1
elif it_line==1:
lineS.pop(0)
for observation in lineS:
data[it+aux_it][9] = float(observation)
aux_it+=1
it_line = it_line + 1
else:
lineS.pop(0)
for observation in lineS:
data[it+aux_it][10] = float(observation)
aux_it+=1
it = it + aux_it
recording = False
it_line = 0
#TODO Que tome adr como porcentaje
if lineS[0] == "Config:":
matrix = int(lineS[1].split('=')[1].split(',')[0])
sdr = int(lineS[2].split('=')[1].split(',')[0])
adr = int(lineS[3].split('=')[1].split(',')[0])
time = float(lineS[5].split('=')[1])
elif lineS[0] == "Config":
recording = True
iters = int(lineS[2].split('=')[1].split(',')[0])
np = int(lineS[5].split('=')[1].split(',')[0])
np_par = int(lineS[6].split('=')[1].split(',')[0])
ns = int(float(lineS[7].split('=')[1]))
return it
#-----------------------------------------------
#Config: matrix=1000, sdr=1000000000, adr=0, aib=0 time=2.000000
#Config Group: iters=100, factor=1.000000, phy=2, procs=2, parents=0, sons=4
#Ttype: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
exit(1)
if len(sys.argv) >= 3:
BaseDir = sys.argv[2]
print("Searching in directory: "+ BaseDir)
else:
BaseDir = sys.argv[2]
if len(sys.argv) >= 4:
print("Csv name will be: " + sys.argv[3] + ".csv")
name = sys.argv[3]
else:
name = "data"
insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*ID*.o*")
print("Number of files found: "+ str(len(lista)));
it = 0
data = [] #0 #1 #2 #3 #4 #5 #6 #7 #8 #9 #10
columns = ["N", "%Async", "NP", "N_par", "NS", "Matrix", "Time", "Iters", "Ti", "Tt", "To"] #11
for elem in lista:
f = open(elem, "r")
it = read_file(f, data, it)
f.close()
#print(data)
df = pd.DataFrame(data, columns=columns)
df.to_csv(name + '.csv')
import sys
import glob
import numpy as np
import pandas as pd
def getData(lineS, outData, tp, hasIter = False):
for data in lineS:
k_v = data.split('=')
if k_v[0] == "time":
time = float(k_v[1])
elif k_v[0] == "iters" and hasIter:
iters = int(k_v[1])
outData[tp] = time
if hasIter:
outData[tp+1] = iters
#-----------------------------------------------
def record(f, observation, line):
# Record first line - General info
lineS = line.split()
for j in range(1,7):
observation[j] = int(lineS[j].split('=')[1])
# Record procces number
line = next(f)
lineS = line.split()
j = 7
for key_values in lineS:
k_v = key_values.split('=')
observation[j] = int(k_v[1])
j+=1
# Record data
j = 9
for j in range(9, 13):
line = next(f)
lineS = line.split()
getData(lineS, observation, j)
line = next(f)
lineS = line.split()
#if observation[0] == "A":
getData(lineS, observation, 13, True)
#else:
# getData(lineS, observation, 13)
#-----------------------------------------------
def read_file(f, dataA, dataB, it):
recording = False
resizes = 0
timer = 0
previousNP = 0
for line in f:
lineS = line.split()
if len(lineS) > 0:
if lineS[0] == "Config": # CONFIG LINE
recording = True
it += 1
dataA.append([None]*9)
dataB.append([None]*12)
resizes = int(lineS[2].split('=')[1].split(',')[0])
compute_tam = int(lineS[3].split('=')[1].split(',')[0])
comm_tam = int(lineS[4].split('=')[1].split(',')[0])
sdr = int(lineS[5].split('=')[1].split(',')[0])
adr = int(lineS[6].split('=')[1].split(',')[0]) #TODO Que lo tome como porcentaje
# TODO Que obtenga Aib
time = float(lineS[8].split('=')[1])
dataB[it][0] = sdr
dataB[it][1] = adr
dataB[it][4] = ""
dataB[it][5] = compute_tam
dataB[it][6] = comm_tam
dataB[it][7] = time
dataB[it][8] = ""
dataA[it][0] = sdr
dataA[it][1] = adr
dataA[it][3] = ""
dataA[it][4] = compute_tam
dataA[it][5] = comm_tam
dataA[it][6] = time
dataA[it][7] = ""
elif recording and resizes != 0: # RESIZE LINE
iters = int(lineS[2].split('=')[1].split(',')[0])
npr = int(lineS[3].split('=')[1].split(',')[0])
dist = lineS[5].split('=')[1]
resizes = resizes - 1
if resizes == 0:
dataB[it][3] = npr
dataB[it][4] += dist
dataB[it][8] += str(iters)
dataA[it][2] = str(previousNP) + "," + str(npr)
dataA[it][3] += dist
dataA[it][7] += str(iters)
timer = 3
else:
dataB[it][2] = npr
dataB[it][4] += dist + ","
dataB[it][8] += str(iters) + ","
dataA[it][3] += dist + ","
dataA[it][7] += str(iters) + ","
previousNP = npr
else: # SAVE TIMES
if timer == 3:
dataB[it][9] = float(lineS[1])
elif timer == 2:
dataB[it][10] = float(lineS[1])
elif timer == 1:
dataB[it][11] = float(lineS[1])
else:
dataA[it][8] = float(lineS[1])
timer = timer - 1
return it
#columnsA1 = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Time", "Iters", "TE"] #8
#columnsB1 = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Time", "Iters", "TC", "TS", "TA"] #11
#Config loaded: resizes=2, matrix=1000, sdr=1000000000, adr=0, aib=0, time=2.000000 || grp=1
#Resize 0: Iters=100, Procs=2, Factors=1.000000, Phy=2
#Resize 1: Iters=100, Procs=4, Factors=0.500000, Phy=2
#Tspawn: 0.249393
#Tsync: 0.330391
#Tasync: 0
#Tex: 301.428615
#-----------------------------------------------
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
exit(1)
if len(sys.argv) >= 3:
BaseDir = sys.argv[2]
print("Searching in directory: "+ BaseDir)
else:
BaseDir = sys.argv[2]
if len(sys.argv) >= 4:
print("Csv name will be: " + sys.argv[3] + "G.csv & " + sys.argv[3] + "M.csv")
name = sys.argv[3]
else:
name = "data"
insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*Global.o*")
print("Number of files found: "+ str(len(lista)));
it = -1
dataA = []
dataB = []
columnsA = ["N", "%Async", "Groups", "Dist", "Matrix", "CommTam", "Time", "Iters", "TE"] #8
columnsB = ["N", "%Async", "NP", "NS", "Dist", "Matrix", "CommTam", "Time", "Iters", "TC", "TS", "TA"] #11
for elem in lista:
f = open(elem, "r")
it = read_file(f, dataA, dataB, it)
f.close()
#print(data)
dfA = pd.DataFrame(dataA, columns=columnsA)
dfA.to_csv(name + 'G.csv')
dfB = pd.DataFrame(dataB, columns=columnsB)
dfB.to_csv(name + 'M.csv')
Esta carpeta contiene códigos para poder analizar los resultados obtenidos.
Para utilizar los códigos es necesario Python con los módulos Numpy y Pandas.
El código analyser.ipynb necesita además de la aplicación JupyterLab.
Los códigos son los siguientes:
- Malltimes.py: Recoge los tiempos globales de maleabilidad y ejecución de todos los ficheros pasados como argumento y
los almacena en dos ficheros CSV para ser utilizados en analyser.ipynb
- Itertimes.py: Recoge los tiempos locales de iteraciones de un grupo de procesos de todos los ficheros pasados como
argumento y los almacena en un fichero CSV para ser utilizado en analyser.ipynb
+ Ejemplo de uso de ambos códigos (Esperan los mismos argumentos):
python3 Malltimes.py NombreFicheros DirectorioFicheros/ NombreCSV
NombreFicheros: La parte común de los ficheros, los códigos buscan solo aquellos nombres que empiecen por esta cadena.
Por defecto, con poner "R" es suficiente.
DirectorioFicheros/: Nombre del directorio donde se encuentran todos los resultados. Esta pensado para que busque
en todos las subdirectorios que tenga en el primer nivel, pero no en segundos niveles o más.
NombreCSV: Nombre del fichero CSV en el que escribir la recopilación de resultados.
- analyser.ipynb: Código para ser ejecutado por JupyterNotebook. Dentro del mismo hay que indicar los nombres de los
ficheros CSV a analizar y tras ellos ejecutar las celdas. Como resultado se obtienen tres ficheros XLSX e imagenes
en el directorio "Images", y además varios resultados sobre T-test entre varios resultados que se reflejan como
output en la salida estandar de JupyterNotebook.
This diff is collapsed.
This diff is collapsed.
import sys
import glob
import numpy as np
import pandas as pd
#-----------------------------------------------
def read_file(f, data, it):
compute_tam = 0
comm_tam = 0
sdr = 0
adr = 0
dist = 0
time = 0
recording = False
it_line = 0
aux_it = 0
iters = 0
np = 0
np_par = 0
ns = 0
columnas = ['Titer','Ttype','Top']
#print(f)
for line in f:
lineS = line.split()
if len(lineS) > 1:
if recording and lineS[0].split(':')[0] in columnas: #Record data
aux_it = 0
lineS.pop(0)
if it_line==0:
for observation in lineS:
data.append([None]*13)
data[it+aux_it][0] = sdr
data[it+aux_it][1] = adr
data[it+aux_it][2] = np
data[it+aux_it][3] = np_par
data[it+aux_it][4] = ns
data[it+aux_it][5] = dist
data[it+aux_it][6] = compute_tam
data[it+aux_it][7] = comm_tam
data[it+aux_it][8] = time
data[it+aux_it][9] = iters
data[it+aux_it][10] = float(observation)
aux_it+=1
elif it_line==1:
for observation in lineS:
data[it+aux_it][11] = float(observation)
aux_it+=1
else:
for observation in lineS:
data[it+aux_it][12] = float(observation)
aux_it+=1
it_line += 1
if(it_line % 3 == 0): # Comprobar si se ha terminado de mirar esta ejecucion
recording = False
it_line = 0
it = it + aux_it
if lineS[0] == "Config:":
compute_tam = int(lineS[1].split('=')[1].split(',')[0])
comm_tam = int(lineS[2].split('=')[1].split(',')[0])
sdr = int(lineS[3].split('=')[1].split(',')[0])
adr = int(lineS[4].split('=')[1].split(',')[0])
time = float(lineS[6].split('=')[1])
elif lineS[0] == "Config":
recording = True
iters = int(lineS[2].split('=')[1].split(',')[0])
dist = int(lineS[4].split('=')[1].split(',')[0])
np = int(lineS[5].split('=')[1].split(',')[0])
np_par = int(lineS[6].split('=')[1].split(',')[0])
ns = int(float(lineS[7].split('=')[1]))
return it
#-----------------------------------------------
#Config: matrix=1000, sdr=1000000000, adr=0, aib=0 time=2.000000
#Config Group: iters=100, factor=1.000000, phy=2, procs=2, parents=0, sons=4
#Ttype: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
if len(sys.argv) < 2:
print("The files name is missing\nUsage: python3 iterTimes.py resultsName directory csvOutName")
exit(1)
if len(sys.argv) >= 3:
BaseDir = sys.argv[2]
print("Searching in directory: "+ BaseDir)
else:
BaseDir = sys.argv[2]
if len(sys.argv) >= 4:
print("Csv name will be: " + sys.argv[3] + ".csv")
name = sys.argv[3]
else:
name = "data"
insideDir = "Run"
lista = glob.glob("./" + BaseDir + insideDir + "*/" + sys.argv[1]+ "*ID*.o*")
print("Number of files found: "+ str(len(lista)));
it = 0
data = [] #0 #1 #2 #3 #4 #5 #6 #7 #8 #9 #10 #11 #12
columns = ["N", "%Async", "NP", "N_par", "NS", "Dist", "Compute_tam", "Comm_tam", "Time", "Iters", "Ti", "Tt", "To"] #13
for elem in lista:
f = open(elem, "r")
it = read_file(f, data, it)
f.close()
#print(data)
df = pd.DataFrame(data, columns=columns)
df['N'] += df['%Async']
df['%Async'] = (df['%Async'] / df['N']) * 100
df.to_csv(name + '.csv')
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment