buku/markit
Arun Prakash Jana 1cc2c18bf1 Replace Unicode chars in page data before UTF-8 decode.
Signed-off-by: Arun Prakash Jana <engineerarun@gmail.com>
2015-11-10 23:25:34 +05:30

530 lines
17 KiB
Python
Executable File

#!/usr/bin/python3
#
# Bookmark management utility
#
# Copyright (C) 2015 Arun Prakash Jana <engineerarun@gmail.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with markit. If not, see <http://www.gnu.org/licenses/>.
import sys
import os
import sqlite3
from getopt import getopt, GetoptError
import readline
import webbrowser
import html.parser as HTMLParser
from http.client import HTTPConnection
from http.client import HTTPSConnection
from urllib.parse import urljoin, unquote
# Globals
addurl = False
addindex = None
online = False
delete = False
openurl = None
show = False
showindex = None
search = False
entry = None
update = False
debug = False
# Show usage of markit and exit
def usage():
print("Usage: markit [OPTIONS] KEYWORDS...")
print("Bookmark manager. Your private Google.\n")
print("Options")
print(" -a URL tag 1, tag 2, ... add URL as bookmark with comma separated tags")
print(" -d N delete entry at DB index N (from -P output)")
print(" -D delete ALL bookmarks")
print(" -i N insert entry at DB index N, useful to fill deleted index")
print(" -o N open URL at DB index N in browser")
print(" -p N show details of bookmark record at DB index N")
print(" -P show all bookmarks along with index from DB")
print(" -s keyword(s) search all bookmarks for a (partial) tag or each keyword")
print(" -u N update entry at DB index N")
print(" -w fetch title info from web, works with -a, -i, -u")
print(" -z show debug information")
print(" you can either add or update or delete in one instance")
print(" any other option shows help and exits markit\n")
print("Keys")
print(" 1-N open Nth search result in browser. Enter exits markit.\n")
print("Version 1.0")
print("Copyright (C) 2015 Arun Prakash Jana <engineerarun@gmail.com>")
print("Webpage: https://github.com/jarun/markit")
sys.exit(1)
# Initialize the database connection
# Create bookmarks table is not existing
def initdb():
dbpath = os.path.join(os.environ.get('HOME'), '.cache', 'markit')
if not os.path.exists(dbpath):
os.makedirs(dbpath)
# Create a connection
conn = sqlite3.connect(os.path.join(dbpath, 'bookmarks.db'))
cur = conn.cursor()
# Create table if it doesn't exist
cur.execute('''CREATE TABLE if not exists bookmarks \
(id integer PRIMARY KEY, URL text NOT NULL UNIQUE, metadata text, tags text)''')
conn.commit()
return (conn, cur)
# Add a new bookmark or update an existing record at index
def AddUpdateEntry(conn, cur, keywords, index):
global online
tags = ','
url = keywords[0]
if len(keywords) > 1:
for tag in keywords[1:]:
if tags[-1] == ",":
tags += tag
else:
tags += " " + tag
if tags[-1] != ",":
tags += ","
meta = ''
if online == True:
secure = True
if url.find("https://") >= 0:
server = url[8:]
elif url.find("http://") >= 0:
secure = False
server = url[7:]
else:
online = False
if online == True:
marker = server.find("/")
if marker > 0:
fetchurl = server[marker:]
server = server[:marker]
else:
fetchurl = url
try:
if debug:
print("server: [%s]" % server)
if secure == True:
urlconn = HTTPSConnection(server, timeout=30)
else:
urlconn = HTTPConnection(server, timeout=30)
if debug:
print("URL: [%s]" % fetchurl)
urlconn.request("GET", fetchurl)
resp = urlconn.getresponse()
if resp.status != 200:
# Handle first redirection
if resp.status in (301,302,):
if debug:
print(resp.getheader('location', ''))
redirurl = urljoin(url, resp.getheader('location', ''))
if redirurl.find("sorry/IndexRedirect?") >= 0:
print("ERROR: Connection blocked due to unusual activity.")
else:
urlconn.close()
secure = False
if url.find("https://") >= 0:
secure = True
if secure == True:
server = redirurl[8:]
marker = server.find("/")
if marker > 0:
server = server[:marker]
urlconn = HTTPSConnection(server, timeout=30)
else:
server = redirurl[7:]
marker = server.find("/")
if marker > 0:
server = server[:marker]
urlconn = HTTPConnection(server, timeout=30)
if debug:
print("Redir server: [%s]" % server)
print("Redir URL: [%s]" % redirurl)
urlconn.request("GET", redirurl)
resp = urlconn.getresponse()
if resp.status != 200:
print("ERROR on retry:", str(resp.status), ": ", resp.reason)
meta = ''
else:
charset = ''
charset = resp.headers.get_content_charset()
if charset == None:
charset = 'utf-8'
if debug:
print(charset)
parser = BMHTMLParser()
if charset == 'utf-8':
parser.feed(resp.read().decode(charset, "replace"))
else:
parser.feed(resp.read().decode(charset))
if parser.data != None and parser.data.find("Error") < 0:
meta = parser.data
else: # if resp.status in (301,302,):
print("ERROR:", str(resp.status), ": ", resp.reason)
meta = ''
else: # if resp.status != 200:
charset = ''
charset = resp.headers.get_content_charset()
if charset == None:
charset = 'utf-8'
if debug:
print(charset)
parser = BMHTMLParser()
if charset == 'utf-8':
parser.feed(resp.read().decode(charset, "replace"))
else:
parser.feed(resp.read().decode(charset))
if parser.data != None and parser.data.find("Error") < 0:
meta = parser.data
except Exception as e:
print("Exception: %s" % e)
meta = ''
finally:
urlconn.close()
if online == True:
meta = meta.strip().replace("\n","")
print("Title: [%s]" % meta)
if index == None: # Insert a new entry
try:
if addindex == None: # addindex is index number to insert record at
cur.execute('INSERT INTO bookmarks(URL, metadata, tags) VALUES (?, ?, ?)', (url, meta, tags,))
else:
cur.execute('INSERT INTO bookmarks(id, URL, metadata, tags) VALUES (?, ?, ?, ?)', (int(addindex), url, meta, tags,))
conn.commit()
print("Added at index %d" % cur.lastrowid)
except sqlite3.IntegrityError:
for row in cur.execute("SELECT id from bookmarks where URL LIKE ?", (url,)):
print("URL already exists at index %s" % row[0])
return
print("Index %s exists" % addindex)
else: # Update an existing entry
try:
cur.execute("UPDATE bookmarks SET URL = ?, metadata = ?, tags = ? WHERE id = ?", (url, meta, tags, int(index),))
conn.commit()
if cur.rowcount == 1:
print("Updated")
else:
print("No matching index")
except sqlite3.IntegrityError:
print("URL already exists")
# Search the database for a tag or mathcing URL or Title info
def searchdb(cur, keywords):
searchtag = ''
for token in keywords:
searchtag += token + " "
searchtag = searchtag[0:-1]
arguments = []
arguments.append(searchtag)
placeholder = "'%' || ? || '%'"
query = "SELECT url, metadata, tags FROM bookmarks WHERE tags LIKE (%s)" % placeholder
for token in keywords:
query += " OR URL LIKE (%s) OR metadata LIKE (%s)" % (placeholder, placeholder)
arguments.append(token)
arguments.append(token)
if debug:
print("%s, (%s)" % (query, arguments))
count = 0
results = []
for row in cur.execute(query, arguments):
results.append(row[0])
count += 1
print("\x1B[1m\x1B[93m%d. \x1B[0m\x1B[92m%s\x1B[0m\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (count, row[0], row[1], row[2][1:-1]))
if count == 0:
return
print("")
while True:
nav = input("Index number to open: ")
if is_int(nav):
index = int(nav) - 1
if index < 0:
print("Index out of bound")
continue
try:
openurl = unquote(results[int(nav) - 1])
browser_open(openurl)
except IndexError:
print("Index out of bound")
else:
break
# Delete a single record or remove the table
def cleardb(conn, cur, index):
if index == None: # Remove the table
cur.execute('DROP TABLE if exists bookmarks')
conn.commit()
else: # Remove a single entry
try:
cur.execute("DELETE FROM bookmarks WHERE id = ?", (int(index),))
conn.commit()
if cur.rowcount == 1:
print("Removed")
else:
print("No matching index")
except IndexError:
print("Index out of bound")
# Print all records in the table
def printdb(cur, index):
if index == None: # Show all entries
for row in cur.execute('SELECT * FROM bookmarks'):
print("\x1B[1m\x1B[93m%s. \x1B[0m\x1B[92m%s\x1B[0m\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (row[0], row[1], row[2], row[3][1:-1]))
else: # Show record at index
try:
for row in cur.execute("SELECT * FROM bookmarks WHERE id = ?", (int(index),)):
print("\x1B[1m\x1B[93m%s. \x1B[0m\x1B[92m%s\x1B[0m\n\t%s\n\t\x1B[91m[TAGS]\x1B[0m %s" % (row[0], row[1], row[2], row[3][1:-1]))
return
print("No matching index")
except IndexError:
print("Index out of bound")
# Fetch index and open URL in browser
def fetchopen(index):
try:
for row in cur.execute("SELECT URL FROM bookmarks WHERE id = ?", (int(index),)):
url = unquote(row[0])
browser_open(url)
return
print("No matching index")
except IndexError:
print("Index out of bound")
# Check if a value is a digit
def is_int(string):
try:
int(string)
return True
except:
return False
# Parse HTML page for Title info
class BMHTMLParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.inTitle = False
self.data = ""
self.lasttag = None
def handle_starttag(self, tag, attrs):
self.inTitle = False
if tag == "title":
self.inTitle = True
self.lasttag = tag
def handle_endtag(self, tag):
if tag == "title":
self.inTitle = False
def handle_data(self, data):
if self.lasttag == "title" and self.inTitle == True:
self.data += data
# Open a URL in browser
def browser_open(url):
url = url.replace("%22", "\"")
_stderr = os.dup(2)
os.close(2)
_stdout = os.dup(1)
os.close(1)
fd = os.open(os.devnull, os.O_RDWR)
os.dup2(fd, 2)
os.dup2(fd, 1)
try:
webbrowser.open(url)
except Exception as e:
print("Browser Exception: %s" % e)
finally:
os.close(fd)
os.dup2(_stderr, 2)
os.dup2(_stdout, 1)
# Main starts here
# ----------------
optlist = None
keywords = None
if len(sys.argv) < 2:
usage()
# Check cmdline options
try:
optlist, keywords = getopt(sys.argv[1:], "d:i:o:p:u:aDPswz")
if len(optlist) < 1:
usage()
for opt in optlist:
if opt[0] == "-a":
if update == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
addurl = True
elif opt[0] == "-d":
if addurl == True or update == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
entry = opt[1]
if int(entry) <= 0:
usage()
delete = True
elif opt[0] == "-D":
if addurl == True or update == True:
print("You can either add or update or delete in one instance\n")
usage()
delete = True
elif opt[0] == "-i":
if update == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
addindex = opt[1]
if int(addindex) <= 0:
usage()
addurl = True
elif opt[0] == "-o":
if not opt[1].isdigit():
usage()
openurl = opt[1]
if int(openurl) <= 0:
usage()
elif opt[0] == "-p":
if not opt[1].isdigit():
usage()
showindex = opt[1]
if int(showindex) <= 0:
usage()
show = True
elif opt[0] == "-P":
show = True
elif opt[0] == "-s":
search = True
elif opt[0] == "-u":
if addurl == True or delete == True:
print("You can either add or update or delete in one instance\n")
usage()
if not opt[1].isdigit():
usage()
entry = opt[1]
if int(entry) <= 0:
usage()
update = True
elif opt[0] == "-w":
online = True
elif opt[0] == "-z":
debug = True
except GetoptError as e:
print("markit:", e)
sys.exit(1)
# Initilize the database and get handles
conn, cur = initdb()
# To insert (-i) a new record at user-defined index, -a option is must
if addindex != None and addurl == False:
conn.close()
usage()
# Call add or update record
if addurl == True or update == True:
if len(keywords) < 1:
conn.close()
usage()
AddUpdateEntry(conn, cur, keywords, entry)
# Search tags, URLs, Title info
if search == True:
if len(keywords) < 1:
conn.close()
usage()
searchdb(cur, keywords)
# Print all records
if show == True:
printdb(cur, showindex)
# Open URL in browser
if openurl != None:
fetchopen(openurl)
# Remove a single record of all records
if delete == True:
cleardb(conn, cur, entry)
# Close the connection before exiting
conn.close()