Last active
December 15, 2015 22:19
-
-
Save phillipkent/5331804 to your computer and use it in GitHub Desktop.
N-plus-transform: An implementation in Python of the Oulipo 'S+7' ('noun + 7') text transformation.
[http://www.oulipo.net/contraintes/docs/s-7 , http://www.spoonbill.org/n+7/ ]
Requires the NLTK toolkit http://nltk.org, and uses a nouns list sourced from http://www.ashley-bovan.co.uk/words/partsofspeech.html
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Created on 14 Mar 2013 | |
@author: phillip | |
INCOMPLETE CODE UNDER DEVELOPMENT | |
''' | |
# Oulipo's 'N plus' transformation for text | |
# | |
# Words list 'nouns91k.txt' contains 91000 nouns, found at | |
# http://www.ashley-bovan.co.uk/words/partsofspeech.html | |
# Words have been reordered strictly alphabetical, original list had | |
# capitalised proper nouns first in order | |
#import nltk | |
from nltk import pos_tag, word_tokenize | |
import re | |
wordfile = "nouns91k.txt" | |
wfile = open(wordfile, 'r') | |
nounslist = wfile.readlines() # read entire content of wordfile to memory - is this most efficient?? | |
wfile.close() | |
def main(): | |
print("*** Oulipo's 'N plus' transformation ***\n") | |
while 1: | |
try: | |
sourcefile = "textgibson.txt" #fix the sourcefile for now | |
# sourcefile = str(input("Enter source file name: ")) | |
nmin = 7 | |
nmax = 13 | |
#nmin = int(input("Enter shift number min: ")) | |
#nmax = int(input("Enter shift number max: ")) | |
"""plan of code: | |
loop n from nmin to nmax | |
extract nouns from sourcefile to a n-by-2 list | |
lookup 'noun + n' in wordfile and insert to 2nd position of list | |
(if noun not found, replacement is same noun) | |
create shifted text by global replace of 'noun' by 'noun+n' in sourcefile | |
print and/or write shifted text | |
end loop | |
""" | |
sfile = open(sourcefile,'r') | |
text = sfile.read() | |
sfile.close() | |
# see for better tagging: https://nltk.googlecode.com/svn/trunk/doc/howto/tag.html | |
textwords= pos_tag(word_tokenize(text)) # tagging of words - by nltk library | |
textwords = filter(nounselect,textwords) | |
'''textwords.sort() # not necessary to do sorting?''' | |
textwords = set(textwords) # remove repeated words | |
#TO DO: need to remove all punctuation from words in textwords | |
n = nmin | |
while n <= nmax: | |
newtext = text | |
print ('SOURCE:', sourcefile," SHIFT: ", n) | |
shiftedwords = map(lambda x: nounreplace(x,n),textwords) | |
for l in shiftedwords: | |
newtext = re.sub('\\b'+l[0]+'\\b',l[1],newtext) | |
print(newtext) | |
n = n + 1 | |
return | |
except TypeError: | |
print("Invalid input (.....).") | |
continue | |
except ValueError: | |
print("Exiting.") | |
return | |
def nounselect(wordandpos): | |
if wordandpos[1]=='NN' or wordandpos[1]=='NNP' or wordandpos[1]=='NNS' : return 1 | |
return 0 | |
def nounreplace(wordandpos,nshift): | |
return (wordandpos[0],findreplacement(wordandpos[0],nshift)) | |
def findreplacement(noun,nshift): | |
''' | |
regexp = re.compile('\\b'+noun+'\\b',re.I) # find matching noun = whole word, ignore case | |
nounshifted = '' | |
for line in wfile: | |
result = regexp.search(line) | |
if result : | |
nounshifted = line.strip() | |
print(noun, nounshifted) | |
break | |
if nounshifted == '': | |
nounshifted = noun | |
''' | |
try: | |
# look for position of noun in nounslist - if not found, throws ValueError | |
# TO DO: .index() assumes line terminators, should handle any case - with regex ? | |
ipos = nounslist.index(noun+'\r\n') | |
# lookup the shifted noun in nounslist | |
nounshifted = nounslist[ipos+nshift].strip() | |
#print(noun, nounshifted) | |
return(nounshifted) | |
except ValueError: | |
#print(noun + ' not found') | |
# if noun not found, the shifted noun is same as noun | |
return(noun) | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
The sky above the port was the color of television, tuned to a dead channel. | |
"It's not like I'm using," Case heard someone say, as he shouldered his way through the crowd around the door of the Chat. "It's like my body's developed this massive drug deficiency." It was a Sprawl voice and a Sprawl joke. The Chatsubo was a bar for professional expatriates; you could drink there for a week and never hear two words in Japanese. | |
Ratz was tending bar, his prosthetic arm jerking monotonously as he filled a tray of glasses with draft Kirin. He saw Case and smiled, his teeth a webwork of East European steel and brown decay. Case found a place at the bar, between the unlikely tan on one of Lonny Zone's whores and the crisp naval uniform of a tall African whose cheekbones were ridged with precise rows of tribal scars. "Wage was in here early, with two joeboys," Ratz said, shoving a draft across the bar with his good hand. "Maybe some business with you, Case?" | |
Case shrugged. The girl to his right giggled and nudged him. | |
The bartender's smile widened. His ugliness was the stuff of legend. In an age of affordable beauty, there was something heraldic about his lack of it. The antique arm whined as he reached for another mug. It was a Russian military prosthesis, a seven-function force-feedback manipulator, cased in grubby pink plastic. "You are too much the artiste, Herr Case." Ratz grunted; the sound served him as laughter. He scratched his overhang of white-shirted belly with the pink claw. "You are the artiste of the slightly funny deal." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment