Real Time Audio Stuff (FFT/LPC Envelope from Audio Device)

Experimenting with matplotlib, audiolazy, and pyaudio for LPC spectrum estimation. This could easily be spun into formant estimation and other fun stuff. Here are some of the gory details:

  • LPC method (lpc.lpc.autocor): employing autocorrelation and the Levinson-Durbin recursion algorithm. This requires slightly fewer computations than the covariance method. 
  • LPC gain (g): to "align" the LPC envelope with the FFT, I approximate the gain parameter by multiplying the windowed autocorrelation of the input stimulus by the LPC coefficients themselves. I then sum the products and take the square-root of the sum. This is an assumption laden process, but it appears to work well. If you're a glutton for punishment, you can read more about these assumptions in Section 8.2 of Rabiner and Schafer's "Digital Processing of Speech Signals".
  • LPC order (p): 12

If your system has trouble running this code in real time, try setting either timeDomain or freqDomain to False under "GUI parameters" to limit computational burden. Alternatively, you can tweak the CHUNK, NFFT, and RATE parameters. For reference, it runs fine on my environment with the following specs:

numpy 1.9.2
pyaudio 0.2.7
audiolazy 0.5
matplotlib 1.4.3

compiler : GCC 4.2.1 (Apple Inc. build 5577)
system : Darwin
release: 15.2.0
machine: x86_64
processor: i386
CPU cores: 4
interpreter: 64bit

 

Code

from matplotlib.animation import FuncAnimation
from audiolazy import lazy_lpc as lpc
import matplotlib.pyplot as plt
import numpy as np
import pyaudio
import sys

#############################
# GUI parameters
#############################
headless = False
timeDomain = True
freqDomain = True
lpcOverlay = True

#############################
# Stream Parameters
#############################
DEVICE = 0
CHUNK = 1024*4
WINDOW = np.hamming(CHUNK)
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100

#############################
# Spectral parameters
#############################
ORDER = 12
NFFT = CHUNK*2
# correlation: 12*4*1024 = 49152
# matrix solution: 12*12 = 144
# matrix solution cov: 12*12*12 = 1728


#############################
# Feature extraction
#############################
def est_predictor_gain(x, a):
    cor = np.correlate(x, x, mode='full')
    rr = cor[len(cor)/2: len(cor)/2+ORDER+1]
    g = np.sqrt(np.sum(a*rr))
    return g


def lpc_spectrum(data):
    a = lpc.lpc.autocor(data, ORDER)
    g = est_predictor_gain(data, a.numerator)
    spectral_lpc = np.fft.fft([xx/g for xx in a.numerator], NFFT)
    S = -20*np.log10(np.abs(spectral_lpc)**2)
    return S[0:NFFT/2]


def spectral_estimate(data):
    spectral = np.fft.fft(data, NFFT)
    S = 20*np.log10(np.abs(spectral)**2)
    return S[0:NFFT/2]

##########################
# Create audio stream
##########################
p = pyaudio.PyAudio()
print "Checking compatability with input parameters:"
print "\tAudio Device:", DEVICE
print "\tRate:", RATE
print "\tChannels:", CHANNELS
print "\tFormat:", FORMAT

isSupported = p.is_format_supported(input_format=FORMAT,
                                    input_channels=CHANNELS,
                                    rate=RATE,
                                    input_device=DEVICE)
if isSupported:
    print "\nThese settings are supported on device %i!\n" % (DEVICE)
else:
    sys.exit("\nUh oh, these settings aren't",
             " supported on device %i.\n" % (DEVICE))

stream = p.open(format=FORMAT,
                channels=CHANNELS,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

##########################
# Fetch incoming data
##########################
errorCount = [0]
if not headless:
    oldy = range(2*CHUNK)

    ######################################################
    # Create new Figure and an Axes to occupy the figure.
    ######################################################
    fig = plt.figure(facecolor='white')  # optional arg: figsize=(x, y)
    nAx = sum([1 for xx in [timeDomain, freqDomain] if xx])

    if timeDomain:
        axTime = fig.add_axes([.1, .5*nAx - .5 + .1/nAx, .8, .4*(3-nAx)])
        axTime.set_axis_bgcolor('#c6dbef')
        plt.grid()
        lineTime, = axTime.plot(range(2*CHUNK), range(2*CHUNK), c='#08519c')
        plt.ylim([-4000, 4000])
        plt.xlim([0, 2*CHUNK])
        plt.title('Real Time Audio (milliseconds)')
        axTime.set_xticks(np.linspace(0, 2*CHUNK, 5))
        labels = ['%.1f' % (xx) for xx in np.linspace(0, 1000*2*CHUNK/RATE, 5)]
        axTime.set_xticklabels(labels, rotation=0, verticalalignment='top')
        plt.ylabel('Amplitude')

    if freqDomain:
        axFreq = fig.add_axes([.1, .1/nAx, .8, 0.4*(3-nAx)])
        axFreq.set_axis_bgcolor('#c6dbef')
        plt.grid()
        lineFreq, = axFreq.plot(range(NFFT/2), [0]*(NFFT/2),
                                c='#6baed6', label='FFT')
        if lpcOverlay:
            lineLPC, = axFreq.plot(range(NFFT/2), [0]*(NFFT/2),
                                   '--', c='#08519c', label='LPC Envelope')
        plt.ylim([-50, 300])
        plt.xlim([0, NFFT/2])
        plt.title('Real Time Audio (Hz)')
        plt.legend()
        axFreq.set_xticks(np.linspace(0, NFFT/2, 5))
        labels = ['%.1f' % (xx) for xx in np.linspace(0, RATE/2, 5)]
        axFreq.set_xticklabels(labels, rotation=0, verticalalignment='top')
        plt.ylabel('Amplitude [dB]')

    ######################################################
    # Define function to update figure for each iteration.
    ######################################################
    def update(frame_number):
        global oldy
        objects_to_return = []
        try:
            incoming = np.fromstring(stream.read(CHUNK), 'Int16')
            if timeDomain:
                newy = oldy[CHUNK:]
                newy.extend(incoming)
                lineTime.set_ydata(newy)
                objects_to_return.append(lineTime)
                oldy = newy
            if freqDomain:
                windowed = incoming*WINDOW
                S = spectral_estimate(windowed)
                lineFreq.set_ydata(S)
                if lpcOverlay:
                    S_lpc = lpc_spectrum(windowed)
                    lineLPC.set_ydata(S_lpc)
                objects_to_return.append(lineFreq)
                objects_to_return.append(lineLPC)
        except IOError as e:
            print str(e),
            errorCount[0] += 1
            if errorCount[0] > 5:
                print "This is happening often.",
                print " (Try increasing size of \'CHUNK\'.)"
            else:
                print
        return objects_to_return

    animation = FuncAnimation(fig, update, interval=10)
    plt.show()

else:
    while True:
        incoming = np.fromstring(stream.read(CHUNK), 'Int16')
        S = spectral_estimate(windowed)
        #  Do some processing stuff here!


Monitor Your Drinking Habits! (with cv2)

Use Canny edge detection and Hough transformation to extract and count wine cork shapes of varying diameter/color/texture.

Inputs

Output

Code

# Will Forfang
# 05/11/2015

print "Loading tools...",
import cv2, pdb
import numpy as np
import os
print "Done."

if 'results' not in os.listdir('.'):
	os.mkdir('results')
	print "Generating results directory."

# Inputs
print "Opening image...",
rimg = cv2.imread('DSC_1531.JPG',0)
img = cv2.imread('DSC_1531_cropped.JPG',0)

print "Done."
print "Applying blur...",
cimg = cv2.medianBlur(rimg,5)
cimg = cv2.cvtColor(rimg,cv2.COLOR_GRAY2BGR)
print "Done."

print "Finding corks...",
corks = cv2.HoughCircles(img,cv2.HOUGH_GRADIENT,1,120,
                            param1=20,param2=25,minRadius=60,maxRadius=80)
corks = np.uint16(np.around(corks))
print "Found",len(corks[0,:]),"corks."

print "Labelling corks...",
for i in corks[0,:]:
    # draw the outer circle
    cv2.circle(cimg,(i[0],i[1]),i[2],(0,255,0),10)
    # draw the center of the circle
    cv2.circle(cimg,(i[0],i[1]),2,(0,0,255),6)
print "Done."

# text position:
anchorX = int(.65*len(cimg[0]))
anchorY = int(.25*len(cimg))
cv2.putText(cimg,'detected '+str(len(corks[0,:]))+' corks', (anchorX,anchorY), cv2.FONT_HERSHEY_SIMPLEX, 5, (0,240,0),10)
cv2.imwrite('results/detected_'+str(len(corks[0,:]))+'_corks.png',cimg)

Vacation Planning (with BeautifulSoup and NLTK)

Mine/scrape online reviews for a given hotel, then generate word clouds from reviewers' most commonly used adjectives. 

This weekend I was tasked with choosing a hotel for an upcoming vacation. TripAdvisor contains dozens (if not hundreds) of user reviews and photographs for any given hotel. The user-interface is easy enough to navigate and the "rating" system is fairly informative. However, when the number of reviews for a given hotel reaches statistical significance, I think TripAdvisor would benefit by providing more sophisticated lexical analyses.

The code below will use web scraping techniques (via BeautifulSoup) to harvest user reviews from TripAdvisor; it will then filter out adjectives with the NLTK wordnet synsets ("synonym set") and generate word clouds. Enjoy!

 

Step 1: Grab a URL

By clicking through a few pages of reviews, it's clear that the "or10" string in the URL is an iterator, which tracks page count as you navigate through the reviews. You'll want to grab a URL which contains this iterator. If you don't see it, click to the next page of reviews and it should appear.

Step 2: Replace the iterator with "ASDF", then add it to the "urls" dictionary in the code below

from bs4 import BeautifulSoup
from urllib2 import urlopen
import unicodedata
import string

urls = {}
urls['paradise island'] = ""http://www.tripadvisor.com/Hotel_Review-g2104579-d316954-Reviews-orASDF0-Paradise_Island_Resort_Spa-Lankanfinolhu_Island.html#REVIEWS""
urls['parisian'] = "http://www.tripadvisor.com/Hotel_Review-g34439-d267917-Reviews-orASDF0-Parisian_Hotel_Suites-Miami_Beach_Florida.html#REVIEWS"
hotels = urls.keys()

class Review_object():

    ''' Class for storing review content.
        Unsure how many attributes I'll
        collect here...we'll start with
        text content and an index '''
    
    def __init__(self, content, idx):
        self.content = content
        self.idx = idx


def make_soup(url):
    r = urlopen(url).read()
    soup = BeautifulSoup(r)
    return soup


def remove_punctuation(input_string):
    
    ''' I'm only interested in the
    word content. Not the punctuation.
    I also need to handle unicode. We'll 
    convert unicode to ascii here and strip
    punctuation. '''
    
    exclude = set(string.punctuation)
    try:
        s = input_string.replace('\t', ' ').replace('\n', ' ').replace('-', ' ')
    except:
        # unsure why this is occasionally necessary
        s = ''.join(xx for xx in input_string.contents)
        s = s.replace('\t', ' ').replace('\n', ' ').replace('-', ' ')

    s = ''.join(ch for ch in s if ch not in exclude)
    s = unicodedata.normalize('NFKD', s).encode('ascii', 'ignore')
    s = ''.join(ch for ch in s if ch.isalpha() or ch==' ')
    return s.lower()


def dump_data(data, hotel_name):
    ''' Save data to file. '''
    url = urls[hotel_name]
    outfile = open('dump_' + hotel_name + '.csv', 'w')
    outfile.write(url + '\n')
    for each_object in data:
        content = remove_punctuation(each_object.content)
        outfile.write(str(each_object.idx) + ',' + content + '\n')
    outfile.close()


data_dictionary = {}
for each_hotel in hotels:
    global_index = 0
    print "\nScraping",each_hotel,"reviews."
    # first, let's see how many pages of reviews exist
    soup = make_soup(urls[each_hotel].replace('ASDF','0'))
    page_links = [xx.attrs for xx in soup.find_all('a') if 'data-page-number' in xx.attrs.keys()]
    pages = [int(xx['data-page-number']) for xx in page_links]
    print "Tripadvisor contains",max(pages),"pages of reviews for this hotel."

    # now we'll iterate through each page
    data = []
    for ii in range(max(pages)+1):
        print ii, 
        url = urls[each_hotel].replace('ASDF', str(ii))
        soup = make_soup(url)
        # find all review titles
        titles = [xx.contents[0] for xx in
                  soup.find_all("span", class_="noQuotes")]
        # find all review entries
        pentries = [xx.contents[0] for xx in
                    soup.find_all("p", class_="partial_entry")]
        # add to dataset
        for eachtitle in titles:
            data.append(Review_object(eachtitle, global_index))
            global_index += 1
        for eachentry in pentries:
            if len(eachentry) > 2:
                data.append(Review_object(eachentry, global_index))
                global_index += 1

    print "\nFinished analyzing " + each_hotel
    data_dictionary[each_hotel] = data
    print "Saving data to dump_%s.csv." % (each_hotel)
    dump_data(data, each_hotel)

from nltk.corpus import wordnet as wn
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

datafiles = ['dump_'+xx+'.csv' for xx in hotels]
excludedwords = ['room','hotel','motel']
wordcount = pd.DataFrame()

for eachfile in datafiles:
    infile = open(eachfile,'r')
    words = []
    # Let's clean the data a bit by removing numbers
    for eachline in infile:
        linewords = [xx.lower() for xx in eachline.split(' ')]
        for eachword in linewords:
            words.append(''.join(ch for ch in eachword if ch.isalpha()))
    words = [xx for xx in words if xx not in excludedwords]
    words_unique = np.unique(words)

    
    # Now we'll filter by parts of speech
    adjectives = []
    for eachword in words_unique:
      ss = wn.synsets(eachword)
      if len(ss) > 0:
        pos = [xx.pos() for xx in ss]
        if 's' in pos:
          adjectives.append(eachword)

    # Generate some word clouds
    all_adjectives = [xx for xx in words if xx in adjectives]

    wc = WordCloud(background_color="white", margin=5, width=600, height=300)
    wc2 = WordCloud(background_color="white", margin=5, width=600, height=300)
    if 'isian' in eachfile:
        wc2 = WordCloud(background_color="black", margin=5, width=600, height=300)
    wordcloud = wc.generate(' '.join(words))
    adjectivecloud = wc2.generate(' '.join(all_adjectives))

    # Print top ten
    print eachfile
    wordcount['all_' + eachfile] = [xx[0] for xx in wordcloud.words_[0:10]]
    wordcount['adj_' + eachfile] = [xx[0] for xx in adjectivecloud.words_[0:10]]
    
    plt.imshow(wordcloud)
    plt.title(eachfile[0:-4].replace('dump_','') + ' all')
    plt.axis("off")
    plt.savefig(eachfile[0:-4] + '_all.png',dpi=600)
    plt.clf()

    plt.imshow(adjectivecloud)
    plt.title(eachfile[0:-4].replace('dump_','') + ' adjectives')
    plt.axis("off")
    plt.savefig(eachfile[0:-4] + '_adjectives.png',dpi=600)
    plt.clf()