{
 "metadata": {
  "name": "",
  "signature": "sha256:fc8b243a328d7e89e52682288ec6bd5849ce746bc700de2351976667b6da97fe"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Survival Driven Development\n",
      "===========================\n",
      "\n",
      "Survival Driven Development (SDD) is the newest software development fad.  In this development framework, you specify what the software is supposed to do, then randomly generate source code to fulfill these requirements.  Most of these attempts will fail, but hopefully one will succeed.\n",
      "\n",
      "Your task is to use SDD to make a function to approximate `x**2 + x`.\n",
      "\n",
      "Hint 1: Randomly generate lambda functions using a restricted vocabulary of source fragments.<br>\n",
      "`vocab = ['x', 'x', ' ', '+', '-', '*', '/', '1', '2', '3']`\n",
      "\n",
      "Hint 2: Only evaluate `x` at a small-ish number of values and save the difference between those answers and the true value of `x**2 + x`.\n",
      "\n",
      "Hint 3: SDD is error prone.  Be sure to catch your errors!"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy\n",
      "import random"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 1
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "vocab = ['x', 'x', ' ', '+', '-', '*', '/', '1', '2', '3']\n",
      "n_chars = 10  # how many characters to try\n",
      "n_tries = 10000  # how many trials before we give up\n",
      "\n",
      "x = numpy.arange(-3, 3, 0.4)\n",
      "known_y = x**2 + x"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 2
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def build_exp(voc, n_chars):\n",
      "    \"\"\"Build a (str) expression of n_chars from the vocabular list voc\"\"\"\n",
      "    exp = ''\n",
      "    for n in range(n_chars):\n",
      "        i = random.randint(0, len(voc)-1)\n",
      "        exp += voc[i]\n",
      "    return exp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 3
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def get_err(y, Y):\n",
      "    \"\"\"Compute the aggregate error between y and Y\"\"\"\n",
      "    sq = numpy.abs(y - Y)\n",
      "    return numpy.sum(sq)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [],
     "prompt_number": 4
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "err = None\n",
      "exp = None\n",
      "exp_log = []\n",
      "TOL = 1.0e-6\n",
      "\n",
      "for n in xrange(n_tries):\n",
      "    exp = build_exp(vocab, n_chars)\n",
      "\n",
      "    # build a string to define a lambda function\n",
      "    temp = 'f = lambda x: ' + exp\n",
      "    temp += '\\n'\n",
      "    # evaluate the new function with argument x, store the result as rez\n",
      "    temp += 'rez = f(x)'\n",
      "\n",
      "    try:\n",
      "        exec(temp)\n",
      "        err = get_err(known_y, rez)\n",
      "    except Exception as e:\n",
      "        # failed to compute function or error. move on to test try\n",
      "        continue\n",
      "\n",
      "    exp_log.append((err, exp))\n",
      "\n",
      "    if err < TOL:\n",
      "        print 'success'\n",
      "        break\n",
      "else:\n",
      "    print 'failure'\n",
      "\n",
      "exp_log = sorted(exp_log)\n",
      "best_err, best_exp = exp_log[0]\n",
      "\n",
      "print 'best error', err\n",
      "print 'best exp  ', best_exp"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "failure\n",
        "best error 50.4\n",
        "best exp    1+x+x * x\n"
       ]
      }
     ],
     "prompt_number": 8
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "<p>\n",
      "If you'd rather, you can do this all within one function."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "from random import randint\n",
      "\n",
      "def generate_function(X,Y, voc, max_try=1000000, max_chars=10):\n",
      "    ''' find the analytic form that describes Y on X '''\n",
      "    tries = []\n",
      "    for n in xrange(max_try):\n",
      "        ## make some random function using the vocabulary\n",
      "        thefunc = \"\".join([voc[randint(0,len(voc)-1)] for x in range(randint(1,max_chars))])\n",
      "        ## construct two python statement, declaring the lambda function and evaluating it at X\n",
      "        mylam = \"y = lambda x: \" + thefunc + \"\\n\"\n",
      "        mylam += \"rez = y(X)\"\n",
      "        try:\n",
      "            ## this may be volitile so be warned!\n",
      "            ## Couch everything in error statements, and\n",
      "            ##  simply throw away functions that aren't reasonable\n",
      "            exec(mylam)\n",
      "        except:\n",
      "            continue\n",
      "        try: \n",
      "            tries.append( ( (abs(rez - Y).sum()) ,thefunc))\n",
      "            if (abs(rez - Y)).sum() < 0.0001:\n",
      "                ## we got something really close\n",
      "                break\n",
      "        except:\n",
      "            pass\n",
      "        del rez\n",
      "        del y\n",
      "        \n",
      "    ### numpy arrays handle NaN and INF gracefully, so we put\n",
      "    ### answer into an array before sorting\n",
      "    a = np.array(tries,dtype=[('rez','f'),(\"func\",'|S10')])\n",
      "    a.sort()\n",
      "    \n",
      "    if a[0][\"rez\"] < 0.001:\n",
      "        print \"took us ntries = {0}, but we eventually found that '{1}' is functionally equivalent to f(X)\".format(n,a[0][\"func\"])\n",
      "    else:\n",
      "        print \"after ntries = {0}, we found that '{1}' is close to f(x) (metric = {2})\".format(n,a[0][\"func\"],a[0][\"rez\"])\n",
      "    \n",
      "    return a[0]\n",
      "    \n",
      "\n",
      "voc = [\"x\",\"x\",\" \",\"+\",\"-\",\"*\",\"/\",\"1\",\"2\",\"3\"]\n",
      "\n",
      "x_array       = np.arange(-3,3,0.4)\n",
      "real_function = x_array**2 + x_array\n",
      "generate_function(x_array, real_function, voc, max_try=10000)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": [
      {
       "output_type": "stream",
       "stream": "stdout",
       "text": [
        "after ntries = 9999, we found that '++x*x' is close to f(x) (metric = 22.6000003815)\n"
       ]
      },
      {
       "metadata": {},
       "output_type": "pyout",
       "prompt_number": 17,
       "text": [
        "(22.600000381469727, '++x*x')"
       ]
      }
     ],
     "prompt_number": 17
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}