GlulkAlex · July 9, 2018 12:12
diff --git a/read_from_json.py b/read_from_json.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 #utf_8 U8, UTF, utf8

 """read_from_json.py:

 Example: 
 of Python's
 web scraping
 with PyMongo
 """

 __author__ = "GlukAlex"

 import pymongo
 import sys
 import json
 import requests
 #import urllib.request
 # AttributeError: 'module' object has no attribute 'request'
 #import urllib
 # ImportError: No module named 'urllib2'
 #import urllib2
 """Note 
 The 'urllib2' module 
 has been split across several modules 
 in Python 3 named 
 'urllib.request' and 
 'urllib.error'. 
 The '2to3 tool' will 
 automatically adapt `imports` 
 when converting your sources to Python 3.
 """
 """Note 
 The 'urllib' module 
 has been split into parts and 
 renamed in Python 3 to 
 'urllib.request', 
 'urllib.parse', and 
 'urllib.error'. 
 The '2to3 tool' will 
 automatically adapt imports 
 when converting your sources to Python 3. 
 Also note that 
 the 'urllib.request.urlopen()' function 
 in Python 3 is equivalent to 
 'urllib2.urlopen()' and that 
 'urllib.urlopen()' has been removed.
 """

 def get_N_Insert_Page_Content(url: str = ""):
    # connect to dataBase
    client = pymongo.MongoClient(
        'mongodb://localhost'
        #'localhost', 
        # default port from MongoDB config files
        # for server to listen to
        #27017
    )
    connection = client

    # attach to 'reddit' database
    db = connection.reddit

    # handle to 'stories' collection
    stories = (
        db.stories
    )

    # clear / delete entirly existing collection
    stories.drop()

    # JSON Response Content
    # There’s also 
    # a builtin JSON decoder, 
    # in case you’re dealing with JSON data:
    #>>> import requests
    #>>> r = requests.get('https://api.github.com/events')
    #>>> r.json()
    # In case 
    # the JSON `decoding` fails, 
    # 'r.json' raises an exception. 
    # For example, 
    # if the `response` gets a '204' (No Content), or 
    # if the `response` contains `invalid` JSON, 
    # attempting 'r.json' raises 
    #ValueError: No JSON object could be decoded.
    
    # get specified web page
    reddit_page = (
        #"https://www.reddit.com/r/technology/.json"
        "http://localhost:8888/files/PyMongo/reddit_com_technology.json"
    )
    #urllib.request.urlopen(
    #    url, 
    #    data=None, 
    #    [timeout, ]*, 
    #    cafile=None, 
    #    capath=None, 
    #    cadefault=False, 
    #    context=None)
    # Open the URL url, 
    # which can be 
    # either a string or 
    # a Request object.
    page_Content = (
        requests.get(reddit_page)
        # For 'http' and 'https' `urls`, 
        # this function returns 
        # a 'http.client.HTTPResponse' object 
        # which has 
        # the following 'HTTPResponse Objects' methods.
        #urllib.request.urlopen(reddit_page)
        #urllib2.urlopen(reddit_page)
        )

    # AttributeError: 'bytes' object has no attribute 'read'
    # DEBUG
    print("""type(page_Content) is: {}""".format(type(page_Content)) )
    
    page_JSON = page_Content.json()
    # DEBUG
    print("""type(page_JSON) is: {}""".format(type(page_JSON)) )
    if type(page_JSON) == str or type(page_JSON) == dict:
        #print("""page_JSON is: {0['data']:50}""".format(page_JSON) )
        print("""page_JSON is: {}""".format(page_JSON, width = 50) )
    
    #json.load(
    #    fp, 
    #    cls=None, 
    #    object_hook=None, 
    #    parse_float=None, 
    #    parse_int=None, 
    #    parse_constant=None, 
    #    object_pairs_hook=None, 
    #    **kw)
    # Deserialize 'fp' 
    # (a '.read()' - supporting file-like object 
    # containing a JSON document) to 
    # a Python object 
    # using this conversion table.
    #page_Content_Parsed = (        
        #json.load(
            # An 'HTTPResponse' instance 
            # wraps the `HTTP response` from the `server`. 
            # It provides 
            # access to 
            # the `request headers` and 
            # the `entity body`. 
            # The response is 
            # an `iterable` object and 
            # can be used in a 'with' statement.
            #HTTPResponse.read([amt])
            # Reads and returns 
            # the `response body`, or 
            # up to the next 'amt' bytes.
            #page_Content.read()
            #page_Content.json()
        #)
    #)

    # Receiving 
    # a status '429' is 
    # not an error, 
    # it is the other server "kindly" asking you to 
    # please stop spamming requests.
    #json_dict.get('data').get('children')[0].get('data')
    if page_JSON != {'error': 429}:
        content_Topics = (
            #page_JSON["data"]["children"]
            page_JSON.get('data').get('children')
        )
        # populate 'stories' with page data
        print("""populateing 'stories' with page data ...""".format()) 
        # iterate over array of objects
        #for item in page_Content_Parsed["data"]["children"]:
        for item in content_Topics:    
            # side effect
            #insert_one(document)
            # Insert a single document.
            if item:
                stories.insert_one(item["data"])
        
 # unit test
 if __name__ == "__main__":
    # OK ?
    get_N_Insert_Page_Content()
	#!/usr/bin/env python
	# -- coding: utf-8 --

	#utf_8 U8, UTF, utf8

	"""read_from_json.py:

	Example:
	of Python's
	web scraping
	with PyMongo
	"""

	__author__ = "GlukAlex"

	import pymongo
	import sys
	import json
	import requests
	#import urllib.request
	# AttributeError: 'module' object has no attribute 'request'
	#import urllib
	# ImportError: No module named 'urllib2'
	#import urllib2
	"""Note
	The 'urllib2' module
	has been split across several modules
	in Python 3 named
	'urllib.request' and
	'urllib.error'.
	The '2to3 tool' will
	automatically adapt `imports`
	when converting your sources to Python 3.
	"""
	"""Note
	The 'urllib' module
	has been split into parts and
	renamed in Python 3 to
	'urllib.request',
	'urllib.parse', and
	'urllib.error'.
	The '2to3 tool' will
	automatically adapt imports
	when converting your sources to Python 3.
	Also note that
	the 'urllib.request.urlopen()' function
	in Python 3 is equivalent to
	'urllib2.urlopen()' and that
	'urllib.urlopen()' has been removed.
	"""

	def get_N_Insert_Page_Content(url: str = ""):
	# connect to dataBase
	client = pymongo.MongoClient(
	'mongodb://localhost'
	#'localhost',
	# default port from MongoDB config files
	# for server to listen to
	#27017
	)
	connection = client

	# attach to 'reddit' database
	db = connection.reddit

	# handle to 'stories' collection
	stories = (
	db.stories
	)

	# clear / delete entirly existing collection
	stories.drop()

	# JSON Response Content
	# There’s also
	# a builtin JSON decoder,
	# in case you’re dealing with JSON data:
	#>>> import requests
	#>>> r = requests.get('https://api.github.com/events')
	#>>> r.json()
	# In case
	# the JSON `decoding` fails,
	# 'r.json' raises an exception.
	# For example,
	# if the `response` gets a '204' (No Content), or
	# if the `response` contains `invalid` JSON,
	# attempting 'r.json' raises
	#ValueError: No JSON object could be decoded.

	# get specified web page
	reddit_page = (
	#"https://www.reddit.com/r/technology/.json"
	"http://localhost:8888/files/PyMongo/reddit_com_technology.json"
	)
	#urllib.request.urlopen(
	# url,
	# data=None,
	# [timeout, ]*,
	# cafile=None,
	# capath=None,
	# cadefault=False,
	# context=None)
	# Open the URL url,
	# which can be
	# either a string or
	# a Request object.
	page_Content = (
	requests.get(reddit_page)
	# For 'http' and 'https' `urls`,
	# this function returns
	# a 'http.client.HTTPResponse' object
	# which has
	# the following 'HTTPResponse Objects' methods.
	#urllib.request.urlopen(reddit_page)
	#urllib2.urlopen(reddit_page)
	)

	# AttributeError: 'bytes' object has no attribute 'read'
	# DEBUG
	print("""type(page_Content) is: {}""".format(type(page_Content)) )

	page_JSON = page_Content.json()
	# DEBUG
	print("""type(page_JSON) is: {}""".format(type(page_JSON)) )
	if type(page_JSON) == str or type(page_JSON) == dict:
	#print("""page_JSON is: {0['data']:50}""".format(page_JSON) )
	print("""page_JSON is: {}""".format(page_JSON, width = 50) )

	#json.load(
	# fp,
	# cls=None,
	# object_hook=None,
	# parse_float=None,
	# parse_int=None,
	# parse_constant=None,
	# object_pairs_hook=None,
	# **kw)
	# Deserialize 'fp'
	# (a '.read()' - supporting file-like object
	# containing a JSON document) to
	# a Python object
	# using this conversion table.
	#page_Content_Parsed = (
	#json.load(
	# An 'HTTPResponse' instance
	# wraps the `HTTP response` from the `server`.
	# It provides
	# access to
	# the `request headers` and
	# the `entity body`.
	# The response is
	# an `iterable` object and
	# can be used in a 'with' statement.
	#HTTPResponse.read([amt])
	# Reads and returns
	# the `response body`, or
	# up to the next 'amt' bytes.
	#page_Content.read()
	#page_Content.json()
	#)
	#)

	# Receiving
	# a status '429' is
	# not an error,
	# it is the other server "kindly" asking you to
	# please stop spamming requests.
	#json_dict.get('data').get('children')[0].get('data')
	if page_JSON != {'error': 429}:
	content_Topics = (
	#page_JSON["data"]["children"]
	page_JSON.get('data').get('children')
	)
	# populate 'stories' with page data
	print("""populateing 'stories' with page data ...""".format())
	# iterate over array of objects
	#for item in page_Content_Parsed["data"]["children"]:
	for item in content_Topics:
	# side effect
	#insert_one(document)
	# Insert a single document.
	if item:
	stories.insert_one(item["data"])

	# unit test
	if __name__ == "__main__":
	# OK ?
	get_N_Insert_Page_Content()