From 37242cc963fcd1fa91eb4642804869285768f828 Mon Sep 17 00:00:00 2001 From: jm Date: Wed, 18 Jan 2012 10:53:13 +0100 Subject: [PATCH] Initial commit. Please see the README --- README | 43 ++++++++++++++++++ constants.py | 38 ++++++++++++++++ correlation_checker.py | 44 +++++++++++++++++++ exc.py | 5 +++ measurements.py | 74 +++++++++++++++++++++++++++++++ number.py | 100 ++++++++++++++++++++++++++++++++++++++++++ operators.py | 116 +++++++++++++++++++++++++++++++++++++++++++++++++ search.py | 84 +++++++++++++++++++++++++++++++++++ stanek.py | 99 +++++++++++++++++++++++++++++++++++++++++ stanek_data.py | 63 +++++++++++++++++++++++++++ 10 files changed, 666 insertions(+) create mode 100644 README create mode 100644 constants.py create mode 100644 correlation_checker.py create mode 100644 exc.py create mode 100644 measurements.py create mode 100644 number.py create mode 100644 operators.py create mode 100644 search.py create mode 100644 stanek.py create mode 100644 stanek_data.py diff --git a/README b/README new file mode 100644 index 0000000..464ea05 --- /dev/null +++ b/README @@ -0,0 +1,43 @@ +Golden Search +----------- + +The code implements simple algorithm for finding combinations of numbers, that are similar to some natural constants. + +E.g. you want to prove, that your house has been made by aliens. You measure size of door, size of house, size of few windows and put this in the program. Voilla. Ratio between door surface and chimney surface is Pi! Ratio between window size and house height is Euler's number. Etc... + + +HowTo: + +1) So you have some measurements, like in stanek_data.py +You should have as much measurements as possible, to have as high precision as possibe. +Also you should get the method precision (like half of the lowest unit on the scale you have on the measuring device) + +stanek.py +2) Now, you may use the Method Analyzer class, which reads in the measurements, and computes average deviation, for you to have at least some + estimate of each method's (+ measuring process) standart deviation. + (you want this, since if you measure twice and you have (by coincidence) [1000, 1000] im milimeters, this does not mean, the measured thing is + in fact 1000 milimeters long with sd=0...) + +3) Now you should define operators for each round. +With the current search algorithm, you may either define ending round, or define maximum +memory usage. If so, when the memory limit is reached, only one more round is made (last round does not take up memory, since we do not need to store +intermediate results). + +4) Define targets you want to reach, using the Correlation Checker + + + +ALGORITHM: +in search.py +the algorithm works in stages. Each stage (except the first one) combines the numbers from previous stages using the defined OPERATORS. +e.g. +OPS = { +, - } +N0 = { 11, 20 } // numbers from measurements +N1 = { 11 + 20, 11 - 20, 20 - 11 } +N2 = { 11 + (11 + 20), 11 + (11 - 20), 11 + (20 - 11), // 11 from N0 + N1 + 20 + (11 + 20), 20 + (11 - 20), 20 + (20 - 11), // 20 from N0 + N1 + (11 + 20) + (11 + 20), (11 + 20) + (11 - 20), (11 + 20) + (20 - 11) // (11 + 20) from N1 + N1 + ... + ... + ... + diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..d5f642e --- /dev/null +++ b/constants.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- + +import math + +from number import Number + +def add_constant(n): + CONSTANTS.append(n) + +NAT_CONSTANTS = [ + Number(1.61803398874989, string='Golden Ratio'), + Number(1/1.61803398874989, string='Golden Ratio Inverted'), + Number(0.577215664901, string='Euler-Mascheroni'), + Number(26000, string='Sun-Milky way center (in light years)'), + Number(math.e, string='e'), + Number(math.pi, string='Pi'), + Number(math.sqrt(2), string=u'√2'), + Number(math.log(2), string=u'ln(2)'), + Number(147 * 10**9, units={'m':1}, string='Earth-Sun Perihelium distance'), + Number(152 * 10**9, units={'m':1}, string='Earth-Sun Aphelium distance'), + Number(384400 * 10**3, units={'m':1}, string='Earth-Moon distance'), + Number(40075017, units={'m':1}, string='Earth Circumference Eq'), + Number(40007860, units={'m':1}, string='Earth Circumference Pol'), + Number(6378100, units={'m':1}, string='Earth Radius Eq'), + Number(6356800, units={'m':1}, string='Earth Radius Pol'), + Number(2.42, string='Earth Land:Sea Ratio'), + #Number(299792458, units={'m':1, 's':-1}, string='Speed of Light') + ] + +CISLA = [ + Number(3.0/10, string='Cislo 3:10'), + Number(6.0/10, string='Cislo 6:10'), + Number(9.0/10, string='Cislo 9:10'), + Number(10**3, string='Cislo 10^3'), + Number(10**6, string='Cislo 10^6'), + Number(10**9, string='Cislo 10^9'), +] +CONSTANTS = NAT_CONSTANTS diff --git a/correlation_checker.py b/correlation_checker.py new file mode 100644 index 0000000..81a3624 --- /dev/null +++ b/correlation_checker.py @@ -0,0 +1,44 @@ +from exc import IncompatibleUnits + +def encode_utf8(st): + return unicode(st).encode('utf-8') + +def myprint(*args): + print encode_utf8(' '.join(map(unicode, args))) + +class CorrelationChecker: + def __init__(self, targets): + self.targets = targets + self.matchd = {} + + def checkCandidate(self, num, verbose=False, verbose_limit=0.1): + for tid, t in enumerate(self.targets): + try: + diff = t.getDifference(num) + diffsofar = self.matchd.get(tid, None) + + if diffsofar == None or diff < diffsofar[0]: + self.matchd[tid] = (diff, num) + if verbose and diff < verbose_limit: + self.printBestMatch(tid) + except IncompatibleUnits: + pass + except ZeroDivisionError: + pass + + def printBestMatches(self): + tids = [ t[0] for t in sorted(self.matchd.items(), key=lambda t:t[1]) ] + for tid in tids: + self.printBestMatch(tid) + + def printBestMatch(self, tid): + match = self.matchd.get(tid, None) + if match: + diff, num = match + t = self.targets[tid] + + print + print "------ %.3f%%; "%(100*(1-diff),), diff + myprint(t, "=", num) + myprint(t.headStr()) + myprint(num.headStr()) diff --git a/exc.py b/exc.py new file mode 100644 index 0000000..d7e5a51 --- /dev/null +++ b/exc.py @@ -0,0 +1,5 @@ +class MemExceeded(Exception): + pass + +class IncompatibleUnits(Exception): + pass diff --git a/measurements.py b/measurements.py new file mode 100644 index 0000000..68d19d9 --- /dev/null +++ b/measurements.py @@ -0,0 +1,74 @@ +import math + +from number import Number + +def avg(l): + assert len(l) + return sum(l) / float(len(l)) + +class MethodAnalyzer: + def __init__(self): + self.method_data = {} + self.method_avg_sdsq = {} + + def addMeasurement(self, m): + assert m.method + l = self.method_data.get(m.method, []) + l.append( m.computeStats() ) + self.method_data[m.method] = l + + def computeStats(self, verbose=False): + for method, l in self.method_data.iteritems(): + avg_sdsq = avg( [ t[2] for t in l ] ) + min_sdsq = min( t[2] for t in l ) + max_sdsq = max( t[2] for t in l ) + + self.method_avg_sdsq[method] = avg_sdsq + if verbose: + print method + print "min =", min_sdsq + print "avg =", avg_sdsq + print "max =", max_sdsq + +class M: + def __init__(self, name, ms, method=None, units=None, precise=False): + if units == None: + units = {'m':1} + + if precise: + assert len(ms) == 1 + + self.name = name + self.ms = map(float, ms) + #self.ms = ms + self.method = method + self.units = units + self.precise = precise + + def computeStats(self): + n = len(self.ms) + mean = avg(self.ms) + # delime takhle kvuli nevychylenymu odhadu + sdsq = sum((x - mean)**2 for x in self.ms) / ( (n - 1) if n > 1 else 1 ) + + return (n, mean, sdsq) + + def toNumber(self, method_resolution=None, method_analyzer=None): + if self.precise: + return Number(self.ms[0], units=self.units, string=self.name) + + n, mean, sdsq = self.computeStats() + if self.method: + if method_analyzer: + mavg = method_analyzer.method_avg_sdsq[self.method] + if sdsq < mavg: + sdsq = mavg + + # the more measurements, the better + sdsq /= n + + if method_resolution: + # add the resolution of the method + sdsq += method_resolution[self.method] ** 2 + + return Number(mean, sdsq=sdsq, units=self.units, string=self.name) diff --git a/number.py b/number.py new file mode 100644 index 0000000..22d28b1 --- /dev/null +++ b/number.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- + +import math + +from exc import IncompatibleUnits +import operators + +def units_op(u1, u2, op): + u = {} + for k in u1.viewkeys() | u2.viewkeys(): + v1, v2 = u1.get(k, 0), u2.get(k, 0) + v = op( v1, v2 ) + if v != 0: + u[k] = v + return u + +def units_diff(u1, u2): + return units_op(u1, u2, lambda x, y : x - y) + +def units_join(u1, u2): + return units_op(u1, u2, lambda x, y : x + y) + +class Number: + def __init__(self, value, sdsq=0, units=None, string='', parents=None): + if units == None: + units = {} + + self.value = float(value) + self.sdsq = sdsq + self.units = units + self.string = string + self.parents = parents + + def __add__(self, other): + return operators.Plus()((self, other)) + + def __sub__(self, other): + return operators.Minus()((self, other)) + + def __mul__(self, other): + return operators.Mult()((self, other)) + + def __rmul__(self, other): + assert isinstance(other, int) or isinstance(other, float) + + return operators.Times(other)((self,)) + + def __div__(self, other): + return operators.Div()((self, other)) + + def getDifference(self, other): + # TODO lip?? + if self.getUnits() != other.getUnits(): + raise IncompatibleUnits + + assert self.sdsq == 0 + + diff = abs(1.0 - other.getValue() / self.getValue()) + + op = other.getSdPerc() + if diff < op: + return op + + return diff + + def getValue(self): + return self.value + + def getSdSquare(self): + return self.sdsq + + def getSd(self): + return math.sqrt(self.getSdSquare()) + + def getSdPerc(self): + return self.getSd() / abs(self.getValue()) + + def getUnits(self): + return self.units + + def strUnits(self): + l=[] + for key in sorted(self.units.keys()): + l.append(key) + v = self.units[key] + if v != 1: + l.append( '^%d'%(v,)) + return ''.join(l) + + def longStr(self): + return self.headStr() + " = " + str(self) + + def headStr(self): + return u'%.4f ± %.4f %s'%(self.getValue(), self.getSd(), self.strUnits()) + + def __str__(self): + s = self.string + if self.parents: + s += "(%s)"%(', '.join(map(str, self.parents), ), ) + return s diff --git a/operators.py b/operators.py new file mode 100644 index 0000000..a28cb35 --- /dev/null +++ b/operators.py @@ -0,0 +1,116 @@ +from math import sqrt + +import number +from exc import IncompatibleUnits + + + +OPERATORS_ALL=[] + +OP_CLASSES=[] +OPERATORS_ARITHMETIC=[] + +def register_operator(cl): + OP_CLASSES.append(cl) + return cl + +class GenericOp(object): + def __init__(self, arity, commutativity, name): + self.arity = arity + self.commutativity = commutativity + self.name = name + + def __call__(self, args): + assert len(args) == self.getArity() + values = map(lambda n: n.getValue(), args) + sdsqs = map(lambda n: n.getSdSquare(), args) + units = map(lambda n: n.getUnits(), args) + + new_value, new_sdsq, new_units = self.computeNewNumber(values, sdsqs, units) + return number.Number(new_value, sdsq=new_sdsq, string=self.name, units=new_units, parents=args) + + def computeNewNumber(self, values, sdsqs, units): + raise NotImplementedError + + def getArity(self): + return self.arity + + def isCommutative(self): + return self.commutativity +# +# +# Arithmetic Operators +# + +@register_operator +class Plus(GenericOp): + def __init__(self): + super(Plus, self).__init__(2, True, "Plus") + + def computeNewNumber(self, (n1v, n2v), (n1sdsq, n2sdsq), (u1, u2)): + if u1 != u2: + raise IncompatibleUnits + new_value = n1v + n2v + new_sdsq = n1sdsq + n2sdsq + return new_value, new_sdsq, u1 + +@register_operator +class Minus(GenericOp): + def __init__(self): + super(Minus, self).__init__(2, False, "Minus") + + def computeNewNumber(self, (n1v, n2v), (n1sdsq, n2sdsq), (u1, u2)): + if u1 != u2: + raise IncompatibleUnits + new_value = n1v - n2v + new_sdsq = n1sdsq + n2sdsq + return new_value, new_sdsq, u1 + +@register_operator +class Mult(GenericOp): + def __init__(self): + super(Mult, self).__init__(2, True, "Mult") + + def computeNewNumber(self, (n1v, n2v), (n1sdsq, n2sdsq), (u1, u2)): + new_value = n1v * n2v + new_sdsq = n1v**2 * n2sdsq + n2v**2 * n1sdsq + return new_value, new_sdsq, number.units_join(u1, u2) + +@register_operator +class Div(GenericOp): + def __init__(self): + super(Div, self).__init__(2, False, "Div") + + def computeNewNumber(self, (n1v, n2v), (n1sdsq, n2sdsq), (u1, u2)): + new_value = n1v / n2v + new_sdsq = n2v**(-2) * n1sdsq + n1v**2 * n2v**(-4) * n2sdsq + return new_value, new_sdsq, number.units_diff(u1, u2) + +# +# Numeric operators +# +# e.g. op(number) -> 2 * number +# + +class Times(GenericOp): + def __init__(self, num): + super(Times, self).__init__(1, True, str(num)) + self.num=num + + def computeNewNumber(self, (n1v,), (n1sdsq, ), (u1,)): + new_value = self.num * n1v + new_sdsq = self.num**2 * n1sdsq + return new_value, new_sdsq, u1 + +# +# Unit Conversions +# + +# TODO TODO TODO + +# +# All together +# + +OPERATORS_ARITHMETIC = map(lambda x: x() , OP_CLASSES) + diff --git a/search.py b/search.py new file mode 100644 index 0000000..a411569 --- /dev/null +++ b/search.py @@ -0,0 +1,84 @@ +from itertools import product, chain +import resource + +from exc import IncompatibleUnits, MemExceeded + +def search(base_numbers, ops, cc, max_round=None, mem_limit=2*1024**2): + # Round 0 + bf = [base_numbers] + # check if measurements do not mean anything directly + for m in base_numbers: + cc.checkCandidate(m, verbose=True) + + elem_count = 0 + final_round = max_round == 0 + round_count = 1 + try: + while not final_round: + final_round = max_round == round_count + + print + print "ROUND %d started"%(round_count,) + print + ro = [] + try: + for op in ops.get(round_count, ops.get('default', None)): + ar = op.getArity() + if ar >= 3: + raise NotImplementedError + + last = bf[-1] + sofar = list(chain(*bf)) + + pr = [last] + [sofar]*(ar-1) + i1 = product(*pr) + i2 = () + + if not op.isCommutative() and ar == 2: + i2 = product(chain(*(bf[:-1])), last) + + for t in chain(i1, i2): + if elem_count % 10000 == 0 and not final_round: + mem_used = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss + if mem_used > mem_limit: + print + print "MEM EXCEEDED" + print "Limit %.1f MB, Used %.1f MB"%(mem_limit / 1024.0, mem_used / 1024.0) + print "FINAL ROUND FOLLOWS" + raise MemExceeded + elem_count += 1 + + try: + res = op(t) + # we do not need to add if we are in the final round + # we save a lot of memory this way (and may even be able to compute one more round therefore + if not final_round: + ro.append(res) + # register the result to see improving matches incrementaly + cc.checkCandidate(res, verbose=True) + except IncompatibleUnits: + pass + except ZeroDivisionError: + pass + + except MemExceeded: + # set the limit so that we end in the next iteration + max_round = round_count + 1 + + bf.append(ro) + print + print "ROUND %d ended, searched %d combinations in total"%(round_count, elem_count) + print + + round_count += 1 + except KeyboardInterrupt: + pass + + print + print + print "END" + print + print "Best matches:" + print + # show what we have found + cc.printBestMatches() diff --git a/stanek.py b/stanek.py new file mode 100644 index 0000000..cefdd3c --- /dev/null +++ b/stanek.py @@ -0,0 +1,99 @@ +from correlation_checker import CorrelationChecker +from measurements import MethodAnalyzer +import operators +from constants import CONSTANTS +from search import search + +from stanek_data import MEASURED_DATA, PRECISE_DATA, method_resolution + +# +# Data Preparation +# + +# compute avg sdsq for each method +ma = MethodAnalyzer() +for m in MEASURED_DATA: + ma.addMeasurement(m) +ma.computeStats(verbose=True) + +# compute list of Numbers from measured data +# this takes into account: +# 1) method precision +# 2) number of samples + +# Convert measurements to Numbers +MEASUREMENTS = map(lambda m: m.toNumber(method_resolution, ma), MEASURED_DATA ) # + PRECISE_DATA) +# Now I add some derived numbers, (like rectangle surface (a*b), once you have measured `a` and `b`. +md = dict( (n.string, n) for n in MEASUREMENTS ) + +obvod = 2 * md['Predek_Sirka'] + 2 * md['Bok_Sirka'] +obvod.parents = None +obvod.string = 'Obvod_Stanku' +MEASUREMENTS.append(obvod) + +for base in [ "Dvere", "Menu_Deska", "Stitek", "Klicova_Dirka", "Bok_O_2", "Predek_O_1", "Vnitrek_Okynka" ]: + obsah = md[ base + "_Vyska" ] * md[ base + "_Sirka" ] + obsah.parents = None + obsah.string = base + '_Obsah' + print md[ base + "_Vyska" ].longStr() + print md[ base + "_Sirka" ].longStr() + print obsah.longStr() + MEASUREMENTS.append(obsah) + +if __name__ == "__main__": + # + # Define searching operators + # + + + MAX_ROUND = 1 + OPS_1 = [ ] + OPS_2 = [ ] + OPS_3 = [ ] + OPS_1.append(operators.Div()) + + #OPS_1.append(operators.Plus()) + #OPS_1.append(operators.Minus()) + #OPS_1.extend(operators.OPERATORS_ARITHMETIC) + for num in [1]: #, 10**10, 10**9, 10**6, 10**3]:#, 1/3.0, 1/6.0, 1/9.0]: + OPS_2.append(operators.Times(num)) + + for num in [10**10, 10**9, 10**6, 10**3]:#, 1/3.0, 1/6.0, 1/9.0]: + OPS_2.append(operators.Times(num)) + #OPS_2.extend(reversed(operators.OPERATORS_ARITHMETIC)) + OPS_2.append( operators.Div() ) + + OPS = dict([ + (1, OPS_1), + (2, OPS_2), + ('default', OPS_3) + ]) + + """ + OPS_1 = [ ] + #OPS_1.append(operators.Div()) + OPS_1.append(operators.Plus()) + OPS_1.append(operators.Minus()) + #OPS_1.extend(perators.OPERATORS_ARITHMETIC) + for num in [10**9, 10**6, 10**3, 1/3.0, 1/6.0, 1/9.0]: + OPS_1.append(operators.Times(num)) + #OPS_2 = list(reversed(operators.OPERATORS_ARITHMETIC)) + OPS_2 = [ operators.Div() ] + + OPS = dict([ + (1, OPS_1), + ('default', OPS_2) + ]) + """ + # + # Search Targets + # + + # we want to look for constants + TARGETS = CorrelationChecker(CONSTANTS) + + # + # Run the search! + # + + search( MEASUREMENTS, OPS, TARGETS, MAX_ROUND, mem_limit=1*1024**2) diff --git a/stanek_data.py b/stanek_data.py new file mode 100644 index 0000000..93e10d6 --- /dev/null +++ b/stanek_data.py @@ -0,0 +1,63 @@ +from measurements import M + +# maximal possible precision of the measuring device +method_resolution = dict([ + ('Laser', 0.0005), + ('Metr', 0.0005), + ('Suplera', 0.000025) + ]) + +# default units are meters +# else +# M( 'Speed_of_light', [ 300000000, 299000000 ], units={'m':1, 's':-1}, method='Laser') + + +MEASURED_DATA = [ +#( '', [], method=''), +M( 'Predek_Sirka', [ 3, 2.99, 3.01 ], method='Laser'), +M( 'Predek_Vyska_Zeme_Deska', [ 0.897, 0.891 ], method='Laser'), +M( 'Vyska_k_Desce', [ 1.065, 1.065, 1.066, 1.066 ], method='Laser'), +M( 'Bok_Sirka', [2.0, 1.998], method='Laser'), +M( 'Dvere_Vyska', [1.931, 1.931], method='Laser' ), +M( 'Dvere_Sirka', [0.728, 0.727], method='Metr' ), +M( 'Okap_Prumer', [0.05315, 0.0533], method='Suplera'), +M( 'Deska_Sirka', [0.0302, 0.03], method='Suplera'), +M( 'Stupinek_Sirka', [0.906, 0.9], method='Metr'), +M( 'Stupinek_Vyska', [0.285, 0.28], method='Metr'), +M( 'Klika_Prumer', [0.04545, 0.04555], method='Suplera'), +M( 'Klicova_Dirka_Sirka', [0.00275, 0.0027], method='Suplera'), +M( 'Klicova_Dirka_Vyska', [0.01145, 0.01145], method='Suplera'), +M( 'Stitek_Sirka', [0.0398, 0.03965, 0.0397, 0.0397], method='Suplera'), +M( 'Stitek_Vyska', [0.233, 0.234, 0.233, 0.233], method='Metr'), +M( 'Menu_Deska_Sirka', [2.855, 2.866], method='Laser'), +M( 'Menu_Deska_Vyska', [0.273, 0.275], method='Laser'), +M( 'Menu_Deska_Obruben', [0.0384, 0.0382], method='Suplera'), +M( 'Od_Dveri_MFF', [6.753], method='Laser'), +M( 'Predek_O_1_Sirka', [0.249, 0.250, 0.251, 0.250], method='Laser'), +M( 'Predek_O_1_Vyska', [0.250, 0.248, 0.257, 0.253], method='Laser'), +#M( 'Predek_O_2_Sirka', [0.784, 0.783, 0.783, 0.783], method='Laser'), +#M( 'Predek_O_2_Vyska', [0.247, 0.247, 0.259, 0.253], method='Laser'), +M( 'Predek_O_3_Sirka', [0.384, 0.380, 0.378, 0.380], method='Laser'), +M( 'Predek_O_3_Vyska', [0.781, 0.781, 0.780, 0.781], method='Laser'), +#M( 'Predek_O_4_Sirka', [0.379, 0.377, 0.381, 0.380], method='Laser'), +#M( 'Predek_O_4_Vyska', [0.247, 0.249, 0.247, 0.247], method='Laser'), +M( 'Vnitrek_Okynka_Sirka', [0.3, 0.299], method='Laser'), +M( 'Vnitrek_Okynka_Vyska', [0.161, 0.161], method='Metr'), +M( 'Bok_O_1_Vyska', [1.377, 1.379, 1.382, 1.383], method='Laser'), +#M( 'Bok_O_1_Sirka', [0.432, 0.43, 0.428, 0.426], method='Laser'), +M( 'Bok_O_2_Sirka', [0.434, 0.433, 0.435, 0.435], method='Laser'), +M( 'Bok_O_2_Vyska', [0.382, 0.381, 0.380, 0.380], method='Laser') +] + +PRECISE_DATA = [ +M( 'Klobasa', [50], precise=True, units={}), +M( 'Maxi_Hot_Dog', [60], precise=True, units={}), +M( 'Hamburger', [30], precise=True, units={}), +M( 'Cheeseburger', [40], precise=True, units={}), +M( 'Hranolky', [35], precise=True, units={}), +M( 'Zero', [0], precise=True, units={}) +# duplicitni: +#( 'Smazeny_Syr', [40], precise=True, units={}), +#( 'Langos', [50], precise=True, units={}), +#( 'Bramborak', [40], precise=True, units={}) +] -- 2.11.4.GIT