Jobs: Difference between revisions

From stacky wiki
No edit summary
No edit summary
 
Line 7: Line 7:
email = "YOUREMAILHERE"
email = "YOUREMAILHERE"
passwd= "YOURPASSWORDHERE"
passwd= "YOURPASSWORDHERE"
names = ["City University of New York", "Georgia Institute of Technology", "Indiana University", "Michigan State University", "Ohio State University, Columbus", "Pennsylvania State University", "Purdue University", "Rutgers", "Stony Brook", "University of California Berkeley", "University of California Los Angeles", "University of California San Diego", "University of California Santa Barbara", "University of Illinois", "University of Maryland", "University of Michigan", "University of Minnesota", "University of North Carolina", "University of Oregon", "University of Texas", "University of Utah", "University of Virginia", "University of Washington", "University of Wisconsin", "Boston University", "Brandeis University", "Brown University", "California Institute of Technology", "Carnegie Mellon University", "Columbia University", "Cornell University", "Duke University", "Harvard University", "Johns Hopkins University, Baltimore", "Massachusetts Institute of Technology", "Courant Institute", "Northwestern University", "Princeton University", "Rensselaer Polytechic Institute", "Rice University", "Stanford University", "University of Chicago", "University of Notre Dame", "University of Pennsylvania", "University of Southern California", "Washington University", "Yale University"]
names = ["City University of New York", "Georgia Institute of Technology", "Indiana University", "Michigan State University", "Ohio State University, Columbus",  
        "Pennsylvania State University", "Purdue University", "Rutgers", "Stony Brook", "University of California Berkeley", "University of California Los Angeles",  
        "University of California San Diego", "University of California Santa Barbara", "University of Illinois", "University of Maryland", "University of Michigan",  
        "University of Minnesota", "University of North Carolina", "University of Oregon", "University of Texas", "University of Utah", "University of Virginia",  
        "University of Washington", "University of Wisconsin", "Boston University", "Brandeis University", "Brown University", "California Institute of Technology",  
        "Carnegie Mellon University", "Columbia University", "Cornell University", "Duke University", "Harvard University", "Johns Hopkins University, Baltimore",  
        "Massachusetts Institute of Technology", "Courant Institute", "Northwestern University", "Princeton University", "Rensselaer Polytechic Institute",  
        "Rice University", "Stanford University", "University of Chicago", "University of Notre Dame", "University of Pennsylvania", "University of Southern California",  
        "Washington University", "Yale University"]


targetfile = "jobs.html"
targetfile = "jobs.html"

Latest revision as of 16:59, 25 November 2011

Here's a python script that will take a list of university names and produce a page listing all jobs at those universities listed on mathjobs.

#!/usr/bin/python
#

email = "YOUREMAILHERE"
passwd= "YOURPASSWORDHERE"
names = ["City University of New York", "Georgia Institute of Technology", "Indiana University", "Michigan State University", "Ohio State University, Columbus", 
         "Pennsylvania State University", "Purdue University", "Rutgers", "Stony Brook", "University of California Berkeley", "University of California Los Angeles", 
         "University of California San Diego", "University of California Santa Barbara", "University of Illinois", "University of Maryland", "University of Michigan", 
         "University of Minnesota", "University of North Carolina", "University of Oregon", "University of Texas", "University of Utah", "University of Virginia", 
         "University of Washington", "University of Wisconsin", "Boston University", "Brandeis University", "Brown University", "California Institute of Technology", 
         "Carnegie Mellon University", "Columbia University", "Cornell University", "Duke University", "Harvard University", "Johns Hopkins University, Baltimore", 
         "Massachusetts Institute of Technology", "Courant Institute", "Northwestern University", "Princeton University", "Rensselaer Polytechic Institute", 
         "Rice University", "Stanford University", "University of Chicago", "University of Notre Dame", "University of Pennsylvania", "University of Southern California", 
         "Washington University", "Yale University"]

targetfile = "jobs.html"

from mechanize import Browser
from lxml import etree
from StringIO import StringIO
import time

br = Browser()

# need to log in first
br.open("https://www.mathjobs.org/jobs?info-ja")
br.select_form(nr=0)
br["email"]=email
br["pass"]=passwd
br.submit()

# now search for jobs
results = etree.Element("table")
first = True
for n in names:
    br.open("https://www.mathjobs.org/jobs?jobsearch")
    br.select_form(name="mainForm")
    br["Name"]=n
    br.submit()
    page = etree.parse(StringIO(br.response().read()), etree.HTMLParser()).getroot()
    try:
        resulttable = page.find("body").findall("table")[1]
    except:
        resulttable = ['x']
        print "trouble with", n
    if first:
        results.append(resulttable[0]) # include headers first time
        first = False
    results.extend(resulttable[1:]) # remove column headers
    time.sleep(5) # wait five seconds to be gentle on the mathjobs server

page[1][3]=results  # plug the concatenated results into the last viewed page
f = open(targetfile,"w")
f.write(etree.tostring(page))
f.close()

Then fix the links with

sed -e 's|<a href="/jobs|<a href="https://www.mathjobs.org/jobs|g' jobs.html > jobs1.html

You'll need to configure your browser to not specify the referrer, since mathjobs doesn't like people linking directly to jobs postings. For Firefox, you can use the RefControl extension.

Then you can open jobs1.html in your browser, and click the links to go to the jobs. Note that the wizard tool won't work.