#!/usr/bin/python # -*- coding: utf-8 -*- ## Author: Jonathan Newnham ## Date: 2009-06-23 ## ## This program takes data from Lenneke's experiments and presents nice ## maps of the data. ## The data is from a 2d area of silicon (several umetres) that was ## scanned with an ion beam and charge collection events and their ## associated energies and the position of the beam were recorded. The ## data is read in from an .evt (event) file (structure given on ~p65 of ## MpSys3 Users' Manual) that has been converted to a .csv. ## ## The .csv is expected to be in the format ## Energy,X,Y,Station ## with a header row. ## ## This version handles both stations at once. ## This program generates no output - call matshowj() or doplot() for that. import numpy, matplotlib, pylab, pprint, math, os, sys # matplotlib.rc('text', usetex=True) # allow use of tex commands in labels stations = [1,3] # which stations to generate data for saveexts = [".png"] # Save plots as this file type (extension) colormap = pylab.cm.spectral_r # Color scheme to use for plots FirstPassOnly = False # restrict data to first pass FilterHighEnergies = False # remove high-energy events FilterPairs = False # remove events that only generated signal at one electrode FilterMedians = False # only take the median event for each (x,y) pair FilterProfile = True # remove all events except those on a line between terminals FilterProfile_line_width = 30 # line width in pixels FilterArea = False # Filter a certain area of the image AreaMean = False # calculate the mean over an area Rotation = 0 # Rotation (counterclockwise about origin) in degrees. Scale = 12./60 # micron / px EnergyScale = 2000. / 450 # keV / channel # .csv file to read from (default) path="/home/j/Documents/uni/physics/research/locateIonVacWork/two-terminal-detection/data/" path="/home/j/Documents/uni/physics/research/locateIonVacWork/two-terminal-detection/data-2006/s107 -20060919/" fname = "e3.evt.evt" datfile = path+fname # Read filename from first argument if exists if len(sys.argv) > 1: datfile = sys.argv[1] if os.path.exists(datfile): path, fname = os.path.split(datfile) print "Using %s as data file."%fname else: print "File not found: %s"%datfile raise Exception("Doh") if os.path.exists(datfile+'.npy'): print "Loading from cache" events = numpy.load(datfile+'.npy') print "Converting to lists..." events = events.tolist() else: f = open(datfile, 'r') print "Reading... " lines = f.readlines()[1:] # ignore header row f.close() ## events: list[(Energy, x, y, station)] print "Parsing... " lines = [line.split(",") for line in lines] events = [(int(line[0]), int(line[1]), int(line[2]), int(line[3])) for line in lines if len(line)>3] print "Saving to dat file..." numpy.save(datfile, numpy.array(events)) # Perform rotation if Rotation > 0: print "Rotating... " c = math.cos(Rotation * math.pi / 180) s = math.sin(Rotation * math.pi / 180) events = [(e[0], int(e[1] * c - e[2] * s), int(e[1]*s + e[2]*c), e[3]) for e in events] # find minimum and maximum x,y so plot sensible print "Properties... " minx = min([event[1] for event in events]) maxx = max([event[1] for event in events]) miny = min([event[2] for event in events]) maxy = max([event[2] for event in events]) sizex = maxx - minx + 1 sizey = maxy - miny + 1 if FilterArea: print "Filtering..." events = [e for e in events if e[1] > minx + 96 and e[1] < minx + 124 and e[2] > miny + 121 and e[2] < miny + 155] # find some data about the energies energies = numpy.array([event[0] for event in events]) energies.sort() emean, estddev = numpy.mean(energies), numpy.std(energies) emedian = energies[len(energies)//2] # Remove all events after y-value changes direction. if FirstPassOnly: # find direction y0 = events[0][2] index = 0 while events[index][2] == y0: index+=1 # find change in direction based on direction if events[index][2] > y0: while events[index][2] >= events[index-1][2]: index += 1 else: while events[index][2] <= events[index-1][2]: index += 1 events = events[:index] # take slice up to change in direction # filter high energies a long way from the mean if FilterHighEnergies: events = [event for event in events if event[0] < emean+2*estddev] # filter data so that only events that generated signal at both stations # are permitted def pair(event_a, event_b): '''two events form a pair if they have the same coordinate but a different station.''' return (event_a[1] == event_b[1] and event_a[2] == event_b[2] and event_a[3] != event_b[3]) pairedEvents = events[:] if FilterPairs: # step through backwards so removing items doesn't mess up our indexing print "Pairing... ", for i in range(len(events)-2,0,-1): if not (pair(events[i-1], events[i]) or pair(events[i], events[i+1])): pairedEvents.pop(i) # Pair up events (if choice between pairing with next or previous events # arises, choose the previous.) pairs = [] def pairRandom(): while len(events) > 1: if pair(events[0], events[1]): pairs.append((events[0], events[1])) events.pop(0) events.pop(0) pairs_made = False def pairCheat(): ''' Cheat while pairing - if there is a choice, choose the neibouring pair that will get us closest to a total energy of 450. 450 was chosen as that is the peak of the energy spectrum (presumably where most ions that went through an electrode ended up, and thus a good measure of the total energy).''' global pairs_made if pairs_made == True: return print "Pairing with cheating." target = 450 while len(events) > 2: if pair(events[0], events[1]): if pair(events[1], events[2]): # can pair #1 either way - choose whichever takes us closer # to target e0, e1, e2 = events[0][0], events[1][0], events[2][0] if abs(target - (e0 + e1)) < abs(target - (e1 + e2)): pairs.append((events[0], events[1])) events.pop(0) else: pairs.append((events[1], events[2])) events.pop(0) # discard event 0: 2 pairs with 1 events.pop(0) # better than 1 pairs with 0. else: # 1 and 2 aren't a pair - must pair 0 and 1. pairs.append((events[0], events[1])) events.pop(0) events.pop(0) pairs_made = True ## filter out pairs with energies greater than 450 # pairs = [pair for pair in pairs if pair[0][0] < 450 and pair[1][0] < 450] events = pairedEvents if FilterProfile: # Take a profile. # Events will be transformed to have a coordinate (l) which represents # the distance along this line, and a coordinate (d) which is the shortest distance to the line. # ref http://mathworld.wolfram.com/Point-LineDistance2-Dimensional.html def fittoline(x1, y1, x2, y2): print "Fitting to line... " global events, events_line L = math.sqrt((x2 - x1)**2 + (y2 - y1)**2) def l(x0, y0): # distance along line return 1/L * ((x0 - x1)*(x2 - x1) + (y0 - y1)*(y2 - y1)) def d(x0, y0): # distance to closest point on line return (1/L * ((x2 - x1)*(y1 - y0) - (x1 - x0)*(y2 - y1))) events = [e for e in events if abs(d(e[1], e[2])) < FilterProfile_line_width] # filter events far from line # events_line: [(Energy, l, station)] events_line = [(e[0], l(e[1],e[2])*Scale, d(e[1],e[2])*Scale, e[3]) for e in events] # Perform the contraction (/rotation) fittoline(x1 = minx+121, y1=miny+105, x2 = minx+213, y2 = miny+110) # sort events into lists for each station station_events = [[event for event in events if event[3] == station] for station in stations] print "Generating matrices... " # Build x-y matrix: each "site" contains a list of (Energy, station) pairs # y index row, x column. y outer index. eventmatrix = [[[[] for x in range(sizex)] for y in range(sizey)] for station in stations] for station, station_evts in zip(range(len(station_events)), station_events): for event in station_evts: # append energy of event to list of energies at this coordinate eventmatrix[station][event[2]-miny][event[1]-minx].append(event[0]) # Generate some matrices: # Find median, mean energy of each site medians = [numpy.zeros((sizey, sizex), dtype=int) for station in stations] means = [numpy.zeros((sizey, sizex), dtype=int) for station in stations] stddevs = [numpy.zeros((sizey, sizex), dtype=int) for station in stations] numbers = [numpy.zeros((sizey, sizex), dtype=int) for station in stations] for station in range(len(station_events)): for y in range(len(eventmatrix[station])): for x in range(len(eventmatrix[station][0])): cell = eventmatrix[station][y][x] # sort events in this cell in order of increasing energy # (for median) cell.sort() if len(cell) > 0: # set values in various matrices for this site medians[station][y,x] = cell[(len(cell)-1)//2] means[station][y,x] = numpy.mean([energy for energy in cell]) stddevs[station][y,x] = numpy.std([energy for energy in cell]) numbers[station][y,x] = len(cell) if FilterMedians: # Remove all events except the median at each site events = [(medians[station][y,x], x+minx, y+miny, stations[station]) for station in range(len(stations)) for y in range(len(eventmatrix[station])) for x in range(len(eventmatrix[station][0])) if medians[station][y,x] > 0] if FilterProfile: # events_line: [(energy, position_along_line)] events_line = [(e[0], l(e[1],e[2])*Scale, d(e[1], e[2])*Scale, e[3]) for e in events] print "Finished Setup." def matshowj(matrix): '''Plot a matrix in an interactive window. May have to call pylab.show() to make it visible.''' pylab.matshow(matrix, cmap=colormap) pylab.colorbar(shrink=0.8) pylab.xlabel("Position in x coordinate") pylab.ylabel("Position in y coordinate") def doplot(desc=''): '''Plot and save matrices to file.''' for i, station in zip(range(len(stations)), stations): matshowj(medians[i]) pylab.title("Median energy (channel)") savefig(desc+'station%s-median'%str(station)) matshowj(means[i]) pylab.title("Mean energy (channel)") savefig(desc+'station%s-mean'%str(station)) matshowj(stddevs[i]) pylab.title("Std. Dev. energy (channel)") savefig(desc+'station%s-stddev'%str(station)) matshowj(numbers[i]) pylab.title("Counts") savefig(desc+'station%s-counts'%str(station)) def savefig(fnamedetail = ""): for saveext in saveexts: pylab.savefig("%s-%s%s"%(fname,fnamedetail,saveext)) # calculate 3-mean (average over a manhattan radius of 3) of median values if AreaMean: m = 2 # radius of square to average over medians_mean = [numpy.zeros((sizey,sizex), dtype=int) for station in stations] for station in range(len(station_events)): for y in range(sizey) : for x in range(sizex): nearby_values = [e for e in medians[station][y-m:y+m,x-m:x+m].flatten() if e > 0] if len(nearby_values) > 0: medians_mean[station][y,x] = numpy.mean(nearby_values) def profiley(): """for the x209n12 dataset, a y slice through the region of interest is for y = 1980. Have a look at this to see if it's clean.""" colors = {1:'white', 3:'black'} for station in stations: slice_events = [e for e in events if e[2] == 1980] xs = [e[1] for e in slice_events if e[3] == station] es = [e[0] for e in slice_events if e[3] == station] pylab.scatter(xs, es, c=colors[station]) pylab.show() def profilex(): """for the x209n12 dataset, an x slice through the region of interest x in [2060, 2080).""" colors = {1:'white', 3:'black'} xs = [2070] for station in stations: slice_events = [e for e in events if e[1] in xs] ys = [e[2] for e in slice_events if e[3] == station] es = [e[0] for e in slice_events if e[3] == station] pylab.scatter(ys, es, c=colors[station], s=5) #figure() #pylab.hexbin(ys, es, cmap=colormap) #pylab.colorbar(shrink=0.8) pylab.show() def profilel_ehexbin(gridsize=100): """Plot energy vs. position along line, as a 2D histogram for each station individually. gridsize gives the resolution - higher is finer. """ for station in stations: ls = [e[1] for e in events_line if e[3] == station] es = [e[0] for e in events_line if e[3] == station] #pylab.scatter(ys, es, c=colors[station], s=5) pylab.figure() pylab.hexbin(ls, es, cmap=colormap, gridsize=gridsize) pylab.colorbar(shrink=0.8) pylab.title("Number of events") pylab.xlabel("Position along line") pylab.ylabel("Energy of events") pylab.show() def profilel_dhexbin(gridsize=100): """ Plot position along line vs. distance from line to check angle fit. """ for station in stations: pylab.figure() els = [e for e in events_line if e[3] == station] es, ls, ds, ss = zip(*els) pylab.hexbin(ls, ds, gridsize=gridsize, cmap=colormap) pylab.colorbar(shrink=0.8) pylab.title("Number of events") pylab.xlabel("Position along line") pylab.ylabel("Distance from line") def profilespectrumd(): """Now that we have energy profiles along the line, look at them more clearly with a spectrum at certain distances along the line.""" positions = [0,10,20,30,40,50,60] position_allowed_error = 5 for station in stations: pylab.figure() events_line_station = [e for e in events_line if e[3] == station] for position in positions: events_pos = [e for e in events_line_station if e[1] > position - position_allowed_error and e[1] < position + position_allowed_error] pylab.hist([e[0] for e in events_pos], histtype='step', label="position = %d +/- %d"%(position, position_allowed_error), linewidth = positions.index(position) + 1 ) pylab.legend() pylab.title("Station %d" % station) pylab.ylabel("Number density of events (arb units)") pylab.xlabel("Energy of events") pylab.show() def profilespectrume(): """Take a profile horizontally instead of vertically.""" # Scale everything to nice units... 0.32 micron/px -- Lenneke Book 1 p40. global events_line events_line = [(e[0]*EnergyScale - 100, e[1]*Scale, e[2]*Scale, e[3]) for e in events_line] energies = [400] energy_allowed_error = 100 for station in stations: pylab.figure() i = -1 colors = ['b','r','g','k'] linewidths = [4,3,2] for energy in energies: i = i + 1 events_egy = [e[1] for e in events_line if e[3] == station and e[0] > energy - energy_allowed_error and e[0] < energy + energy_allowed_error] if len(events_egy) == 0: print station, energy else: h = pylab.hist(events_egy, histtype='step', bins=250, label="energy = %d +/- %d"%(energy, energy_allowed_error), #linewidth = energies.index(energy) + 1 normed=True, linewidth=linewidths[i], color = colors[i] ) # add a gaussian-fit error bar at each peak pylab.errorbar(x= numpy.mean(events_egy), y= max(h[0]), xerr = numpy.std(events_egy), fmt = colors[i]+'o' ) print "Channel = %6.2f, Line Position Mean = %6.2f, Line Position Std = %6.2f"%( energy, numpy.mean(events_egy), numpy.std(events_egy)) pylab.legend() pylab.title("Station %d" % station) pylab.ylabel("Number density of events (normalized)") pylab.xlabel(r"Position along line (μm)") def pair_profile(e1, e2): """Return True if e1 and e2 form a pair -- sensible total energy, same coordinate, different station. In sensible units.""" return (abs(e1[0] + e2[0] - 2000) < 400) and e1[1] == e2[1] and e1[3] != e2[3] def paired_line_events(): # pair up events: [(event 1, event 2)] # filter events with line coord < 0, they stuff up the graph global events_line events_line = [e for e in events_line if e[1] >= 0] paired_events = [] # step through backwards so removing items doesn't mess up our indexing i = len(events_line)-1 while i >= 0: i = i - 1 if pair_profile(events_line[i+1], events_line[i]): paired_events.append((events_line[i], events_line[i+1])) i = i - 1 print "Paired %d events"%len(paired_events) return paired_events def profilel_scatter(): """Plot energy vs. position along line, as a scatter plot for both stations on the same graph.""" # Scale everything to nice units... global events_line events_line = [(e[0]*EnergyScale - 100, e[1]*.3, e[2]*.3, e[3]) for e in events_line] colors = {1:'white', 3:'black'} # pair up the events... paired_events = paired_line_events() e1s, e2s = zip(*paired_events) events_line = e1s + e2s # plot stuff for station in stations: ls = [e[1] for e in events_line if e[3] == station] es = [e[0] for e in events_line if e[3] == station] pylab.scatter(ls, es, c=colors[station], s=50) pylab.xlabel(u"Distance along line between electrodes (μm)") pylab.ylabel("Energy of events (keV)") # Join up pairs of points with black lines for e1, e2 in paired_events: pylab.plot([e1[1], e2[1]], [e1[0], e2[0]], 'k-') # format nicely pylab.xlim(0, 20) pylab.ylim(0, 2000) savefig('paired-plot') pylab.show() def testplot(): '''Plot x vs. E with y axis in colour.''' for y in range(miny, miny+10): evts_y = [e for e in events if e[2] == y] if len(evts_y) > 0: Es,xs,ys,ss = zip(*evts_y) pylab.scatter(xs,Es, color=str((y-miny)/10.)) pylab.xlabel("x (pixels)") pylab.ylabel("E (channel)") def densityplot(): '''Plot x vs. y with third axis as number of counts.''' matshowj(numbers[0]) pylab.title("Density of events") def plotcounts(): pylab.matshow(numbers[0], cmap=pylab.cm.gray_r) pylab.colorbar(shrink=0.8) pylab.xlabel("Position in x coordinate") pylab.ylabel("Position in y coordinate") pylab.title("Test Plot Counts") def usage(): print """Usage: to view data, call one of these functions: doplot profiley, profilex, profilel profilespectrumd profilespectrume profilel_ehexbin profilel_dhexbin matshowj (takes one argument, the matrix to display) means medians stddevs numbers """ def testfit(start, end, lowlen, highlen, lowheight, highheight, h): '''Return a number indicating how well these parameters fit the histogram.''' xs = numpy.arange(start, end, 0.1) ys = numpy.zeros(len(xs)) n = 0 i = 0 while i < len(xs): while i < len(xs) and xs[i] < start + n * (lowlen+highlen) + lowlen: ys[i] = lowheight i += 1 while i < len(xs) and xs[i] < start + (n+1)*(lowlen+highlen): ys[i] = highheight i += 1 n += 1 return xs,ys def lineAngleFit(): """Plot various angles for the line to find one that fits best""" for x2 in range(230, 300): pylab.figure() fittoline(x1 = minx+0, y1=miny+0, x2 = minx+x2, y2 = miny+250) profilel() savefig('linefit-x2=%d'%x2) def lineWidthFit(): """Plot various line widths to find the best one.""" for lw in [1,2,3,5,10,20,50,100,1000][::-1]: global FilterProfile_line_width FilterProfile_line_width = lw fittoline(x1 = minx+0, y1=miny+0, x2 = minx+236, y2 = miny+250) pylab.figure() profilel() savefig('linewidth-fit-lw=%d'%lw) def binWidthFit(): """Plot various bin widths to find the best one.""" for bw in numpy.arange(.25, 3, .05): fittoline(x1 = minx+0, y1=miny+0, x2 = minx+242, y2 = miny+250) pylab.figure() profilel(binWidth=bw) savefig("binwidth-fit-bw=%4.2f"%bw) def profilel(binWidth=1.4): '''Histogram counts vs. distance along line.''' es, ls, ds, ss = zip(*events_line) h = numpy.histogram(ls, bins=numpy.arange(0,max(ls),binWidth), normed=True ) pylab.plot(h[1][:-1]+h[1][1:] / 2, h[0], drawstyle='steps-mid') pylab.title("Test Grid Profile") pylab.xlabel("Position along line (um)") pylab.ylabel("Counts (normalized)") # Fit castle battlements to plot #xs,ys = testfit(start = 489, end = 1000, lowlen = 24, highlen = 40, lowheight = .0015, highheight = .0030, h=h) #pylab.plot(xs,ys,drawstyle='steps-pre') def SimEdgeWidth(): """Simulate the process. 3000 ions at each of ten positions, plot histogram, measure edge width.""" import random hits = [random.random()*10 + random.gauss(0,1) for x in range(10) for n in range(3000)] h = pylab.hist(hits, bins=range(math.floor(min(hits)), math.ceil(max(hits))+1), normed=True) hits2 = [random.random()*10 for x in range(10) for n in range(3000)] pylab.hist(hits2, bins=h[1], normed=True, histtype='step') def hexbin(): '''Plot E vs. E graph as a 2D histogram.''' pairCheat() pairs_energies = [(i[0][0], i[1][0]) for i in pairs] pylab.hexbin([i[0] for i in pairs_energies], [i[1] for i in pairs_energies], gridsize=100, bins='log', cmap=pylab.cm.spectral_r) pylab.axis([0,450,0,450]) pylab.colorbar(shrink=0.8) pylab.show() def hexbin_add(): pairCheat() pairs_energies = [(i[0][0], i[0][0] + i[1][0]) for i in pairs] pylab.hexbin([i[0] for i in pairs_energies], [i[1] for i in pairs_energies], gridsize=100, bins='log', cmap=pylab.cm.spectral_r) pylab.axis([0,450,0,450]) pylab.xlabel('Energy at station 1 (Channel)') pylab.ylabel('Total Energy (Channel)') pylab.colorbar(shrink=0.8) savefig('E-vs.-Total-E') def espectrum(): energies = numpy.array(zip(*events)[0]) energies = energies * EnergyScale h = numpy.histogram(energies, bins=numpy.arange(500)*EnergyScale) pylab.plot(h[1][1:], h[0], drawstyle='steps-mid') pylab.xlabel("Energy (keV)") pylab.ylabel("Counts") pylab.title("Energy spectrum si14 central region") def binnify(xs, ys, bin_start=None, bin_width=1, bin_end=None, reducing_function=None): if bin_start == None: bin_start = min(xs) if bin_end == None: bin_end = max(xs) bin_number = int((bin_end - bin_start) / bin_width + 1) binned = [[] for x in range(bin_number)] for x,y in zip(xs,ys): if x >= bin_start and x <= bin_end: bin_index = int((x - bin_start) / bin_width) binned[bin_index].append(y) if reducing_function != None: binned = [reducing_function(a) for a in binned] bin_edges = list(numpy.arange(bin_start, bin_end+1.5 * bin_width, bin_width)) if len(bin_edges) - 1 != len(binned): raise Exception("Wrong binning!: %d vs. %d"%(len(bin_edges), len(binned))) return binned, bin_edges def doabin(label, ylabel, redf, es, ls, station, drawstyle='line'): print "Performing binning (%s) for station %d"%(label, station) binned, bin_edges = binnify(ls, es, reducing_function = redf, bin_start = -1, bin_end = 16, bin_width = Scale) pylab.figure() pylab.plot(bin_edges[:-1], binned, drawstyle = drawstyle) pylab.xlabel('Distance from left-hand terminal (um)') pylab.xlim(0, 15) pylab.ylabel(ylabel) savefig('Station%d-%s'%(station,label)) def doplot_line(): for station in stations: els = [e for e in events_line if e[3] == station] es, ls, ds, ss = zip(*els) es = numpy.array(es) * EnergyScale doabin('mean', 'Mean energy (keV)', numpy.mean, es, ls, station) doabin('median', 'Median energy (keV)', numpy.median, es, ls, station) doabin('std', 'Std. Dev. in energy (keV)', numpy.std, es, ls, station) doabin('counts', 'number of events', len, es, ls, station, drawstyle='steps-mid') def gaussfit(): total_energies = numpy.array([(i[0][0] + i[1][0]) for i in pairs])*EnergyScale pylab.hist(total_energies, bins=numpy.arange(390*EnergyScale,480*EnergyScale,1*EnergyScale), histtype='step') fitfunc = lambda p, x: p[0]*scipy.exp(-(x-p[1])**2/(2.0*p[2]**2)) errfunc = lambda p, x, y: fitfunc(p,x)-y import scipy, scipy.optimize p0 = scipy.c_[4000, 430*EnergyScale, 30*EnergyScale] #scale, mean, std h = numpy.histogram(total_energies, bins=numpy.arange(390*EnergyScale,480*EnergyScale,1*EnergyScale)) p1, success = scipy.optimize.leastsq(errfunc, p0.copy()[0],args=(h[1][:-1],h[0])) corrfit = fitfunc(p1, h[1][:-1]) pylab.plot(h[1][:-1],corrfit) print "Standard deviation (keV): ", p1 pylab.xlabel("Total energy (keV)") pylab.ylabel("Number of events") savefig('E-vs.-E-gaussfit')