# -*- coding: utf-8 -*-
"""Wordze.com API python bindings."""
__all__ = ("Api", "ApiError", "HistoryKeyword", "Keyword",
"history", "search", "status", "single")
import urllib
import urllib2
import urlparse
from functools import wraps, partial
from datetime import datetime
from xml.dom.minidom import Element, parse
# Search filters
F_NONE = 1
F_ADULT = 2
F_DRUGS = 3
F_GAMBLING = 4
F_WAREZ_HACKING = 5
F_ALL = 6
# Search styles
S_EXACT = "exact"
S_BROAD = "broad"
S_ANY = "any"
def extract_text(dom, name, wrapper=None):
"""
Function tries to extract text data from the first tag
with a given name and wrapps it in a give function / class.
"""
elements = dom.getElementsByTagName(name)
if elements:
text = elements[0].lastChild.data
else:
text = ""
return wrapper(text) if wrapper else text
class ApiError(Exception):
"""Api error wrapper."""
class Keyword(dict):
"""Class wrapper for a keyword item."""
def __init__(self, data):
"""Constructor."""
if isinstance(data, Element):
self["count"] = int(data.getAttribute("Count"))
self["estimated"] = int(data.getAttribute("Estimated"))
# FIXME: ugly
if "term" not in self:
self["term"] = data.childNodes[0].data
else:
self["count"], self["estimated"] = None
self["term"] = data
def __repr__(self):
return "\"%s\"" % self["term"].encode("utf-8")
def __cmp__(self, other):
if not isinstance(other, Keyword):
raise TypeError
if self["count"] < other["count"]:
return -1
elif self["count"] == other["count"]:
return 0
else:
return 1
class HistoryKeyword(Keyword):
"""Class wrapper for a keyword item from history search."""
def __init__(self, term, history):
self["term"] = term
self["date"] = datetime.strptime(
history.getAttribute("Date"), "%Y-%m-%d")
super(HistoryKeyword, self).__init__(history)
def __repr__(self):
return "%s on %s" % (super(HistoryKeyword, self).__repr__(),
self["date"].date())
class Api(object):
"""Api worker class."""
def __init__(self, apikey):
"""Constructor."""
self.apikey = apikey
self.apiurl = "http://api.wordze.com"
def history(self, query, date):
"""
Method performs a lookup of the history for a given
keyword.
Note: the date should be either datetime.datetime
instance or a string of format YYYYMM.
"""
if isinstance(date, datetime):
date = date.strftime("%Y%m")
elif isinstance(date, basestring):
try:
# Validating date format
datetime.strptime(date, "%Y%m")
except ValueError:
raise ApiError("Invalid date format")
else:
raise ApiError("Invalid date format")
dom = parse(self._request("ApiHistory", {"ApiKey": self.apikey,
"Query": query,
"Date": date}))
if self._validate(dom):
# We have just one query, which doesn't change,
# from item to item, so it's convinient to
# wrap it in a partial.
_HistoryKeyword = partial(HistoryKeyword, query)
keywords = map(_HistoryKeyword(query),
dom.getElementsByTagName("data"))
return keywords
def status(self):
"""
Method checks Wordze.com account status (number of API
queries used for a day).
Note: You should ONLY run this at the start of your
application, and keep track until it completes.
"""
dom = parse(self._request("AccountStatus",
{"ApiKey": self.apikey}))
if self._validate(dom):
return {"Search": extract_text(dom, "Search"),
"Wordrank": extract_text(dom, "Wordrank"),
"Dig": extract_text(dom, "Dig")}
return {}
def single(self, *queries):
"""
Method performs a a single keyword search for a given list
of keywords.
"""
if len(queries) > 500:
raise ApiError("Single keyword search is limited to "
"500 queries at a time")
dom = parse(
self._request("KeywordSingle", {"ApiKey": self.apikey,
"Query": ",".join(queries)}))
if self._validate(dom):
return sorted(map(Keyword, dom.getElementsByTagName("Keyword")))
def search(self,
query, pagenum=1, filtertype=F_NONE, numresult=20,
searchstyle=S_BROAD, charlen=None, countlim=None):
"""
Method performs a search using Wordze.com API.
Availible extraparams:
* query - keyword to search for
* pagenum - whe page number in results to show
* filtertype - what to filter out, should be one of the F_* constants
* numresult - number of results per page
* searchstyle - should be one of the S_* constants
* charlen - keyword length limit, explanation:
charlen=-15 will only produce results with 15 or less
characters in the keyword
charlen=25 will only produce results with 25 or more
characters in the keyword.
Note that, length is calculated __including__ spaces.
* countlim - keyword hit count limit , explanation:
countlim=-15 will only produce results with 15 or less hits
countlim=100 will only produce results with 100 or more hits
TODO: write this as a generator yielding pages one by one,
until there's nothing availible
"""
# This is ugly, but well, entitled keyword arguments in a
# function call are even uglier.
params = {"ApiKey": self.apikey,
"Query": query,
"PageNum": pagenum,
"FilterType": filtertype,
"NumResult": numresult,
"SearchStyle": searchstyle,
"CharLen": charlen,
"CountLim": countlim}
dom = parse(self._request("ApiSearch", params))
if self._validate(dom):
print dom.toxml()
return {
"page": extract_text(dom, "Page", int),
"total": extract_text(dom, "TotalPages", int),
"searchstyle": extract_text(dom, "SearchStyle"),
"filters": extract_text(dom, "Filters", int),
"numresult": extract_text(dom, "ResultsPerPage", int),
"keywords": sorted(map(Keyword,
dom.getElementsByTagName("Keyword")))}
def _request(self, method, params, count=None):
url = urlparse.urljoin(
self.apiurl, "%s?%s" % (method, urllib.urlencode(params)))
# XXX: just in case anyone supplies a negative
# max count value :)
count = count if count > 0 else None
while count != 0:
if count:
count -= 1
try:
request = urllib2.urlopen(url)
except urllib2.URLError, exc:
print "%s...retrying" % exc
else:
return request
def _validate(self, dom):
"""
Method validates API response, wrapped in minidom constructor.
If there are errors present, ApiError with appropriate error
message is raised.
"""
errors = dom.getElementsByTagName("Error")
if errors:
raise ApiError(", ".join(error.lastChild.data for error in errors))
return True
# Shortcut functions
apiworker = None
def configure(apikey):
"""Function sets the Api worker for the global (module) calls."""
global apiworker
apiworker = Api(apikey)
def proxy(obj, attr):
@wraps(getattr(Api, attr))
def wrapper(*args, **kw):
global apiworker
if apiworker:
return getattr(apiworker, attr)(*args, **kw)
raise ApiError("ApiKey not set")
return wrapper
search = proxy(apiworker, "search")
status = proxy(apiworker, "status")
single = proxy(apiworker, "single")
history = proxy(apiworker, "history")