Carlo D'Aloia/

Course Notes: Regular Expressions in Python


Course Notes

Use this workspace to take notes, store code snippets, or build your own interactive cheatsheet! For courses that use data, the datasets will be available in the datasets folder.

# Import any packages you want to use here

Take Notes

Add notes here about the concepts you've learned and code cells with code you want to keep.

Add your notes here

# Add your code snippets here
# Find characters in movie variable
length_string = len(movie)

# Convert to string
to_string = str(length_string)

# Predefined variable
statement = "Number of characters in this review:"

# Concatenate strings and print result
print(statement +" "+ to_string)
# Select the first 32 characters of movie1
first_part = movie1[:32]

# Select from 43rd character to the end of movie1
last_part = movie1[42:]

# Select from 33rd to the 42nd character
middle_part = movie2[32:42]

# Print concatenation and movie2 variable
# Get the word
movie_title = movie[11:30]

# Obtain the palindrome
palindrome = movie_title[::-1]

# Print the word if it's a palindrome
if movie_title == palindrome:
# Convert to lowercase and print the result
movie_lower = movie.lower()

# Remove specified character and print the result
movie_no_sign = movie_lower.strip("$")

# Split the string into substrings and print the result
movie_split = movie_no_sign.split()

# Select root word and print the result
word_root = movie_split[1][0:-1]
# Remove tags happening at the end and print results
movie_tag = movie.rstrip("<\i>")

# Split the string using commas and print results
movie_no_comma = movie_tag.split(",")

# Join back together and print results
movie_join = " ".join(movie_no_comma)
# Split string at line boundaries
file_split = file.split("\n")

# Print file_split

# Complete for-loop to split by commas
for substring in file_split:
    substring_split = substring.split(',')
for movie in movies:
  	# If actor is not found between character 37 and 41 inclusive
    # Print word not found
    if movie.find("actor", 37, 42) == -1:
        print("Word not found")
    # Count occurrences and replace two with one
    elif movie.count("actor") == 2:  
        print(movie.replace("actor actor", "actor"))
        # Replace three occurrences with one
        print(movie.replace("actor actor actor", "actor"))
for movie in movies:
    # Find the first occurrence of word
  	print(movie.index("money", 12, 51))
  except ValueError:
    print("substring not found")
# Replace negations 
movies_no_negation = movies.replace("isn't", "is")

# Replace important
movies_antonym = movies_no_negation.replace("important", "insignificant")

# Print out
# Assign the substrings to the variables
first_pos = wikipedia_article[3:19].lower()
second_pos = wikipedia_article[21:44].lower()

# Define string with placeholders 
my_list.append("The tool {} is used in {}")

# Define string with rearranged placeholders
my_list.append("The tool {1} is used in {0}")

# Use format to print strings
for my_string in my_list:
  	print(my_string.format(first_pos, second_pos))
courses = ['artificial intelligence', 'neural networks']

# Create a dictionary
plan = {
  		"field": courses[0],
        "tool": courses[1]

# Complete the placeholders accessing elements of field and tool keys in the data dictionary
my_message = "If you are interested in {data[field]}, you can take the course related to {data[tool]}"

# Use the plan dictionary to replace placeholders
# Import datetime 
from datetime import datetime

# Assign date to get_date
get_date =

# Add named placeholders with format specifiers
message = "Good morning. Today is {today:%B %d, %Y}. It's {today:%H:%M} ... time to work!"

# Use the format method replacing the placeholder with get_date
 field1: 'sexiest job',
 'field2': 'data is produced daily',
 'field3': 'Individuals',
 'fact1': 21,
 'fact2': 2500000000000000000,
 'fact3': 72.41415415151,
 'fact4': 1.09
# Complete the f-string
print(f"{field3} create around {fact3:.2f}% of the data but only {fact4:.1f}% is analyzed")    
'number1': 120,
 'number2': 7,
 'string1': '',
 'list_links': ['',
# Include both variables and the result of dividing them 
print(f"{number1} tweets were downloaded in {number2} minutes indicating a speed of {number1/number2:.1f} tweets per min")

# Replace the substring https by an empty string
print(f"{string1.replace('https','' )}")

# Divide the length of list by 120 rounded to two decimals
print(f"Only {len(list_links)*100/120:.2f}% of the posts contain links")
'east': {'date': datetime.datetime(2007, 4, 20, 0, 0), 'price': 1232443},
 'west': {'date': datetime.datetime(2006, 5, 26, 0, 0), 'price': 1432673}
# Access values of date and price in east dictionary
print(f"The price for a house in the east neighborhood was ${east['price']} in {east['date']:%m-%d-%Y}")
# Access values of date and price in west dictionary
print(f"The price for a house in the west neighborhood was ${west['price']} in {west['date']:%m-%d-%Y}.")
'wikipedia_articles':                        tool                                        description
 0  Natural Language Toolkit  suite of libraries and programs for symbolic a...
 1                  TextBlob  Python library for processing textual data. It...
 2                    Gensim  Gensim is a robust open-source vector space mo...
 3   artificial intelligence  In computer science, artificial intelligence (...,
 'tool1': 'Natural Language Toolkit',
 'description1': 'suite of libraries and programs for symbolic and statistical natural language processing (NLP) for English written in the Python programming language. It was developed by Steven Bird and Edward Loper in the Department of Computer and Information Science at the University of Pennsylvania.',
 'tool2': 'TextBlob',
 'description2': 'Python library for processing textual data. It provides a simple API for diving into common natural language processing tasks such as part-of-speech tagging, noun phrase extraction, sentiment analysis, classification, translation, and more.',
 'tool3': 'Gensim',
 'description3': 'robust open-source vector space modeling and topic modeling toolkit implemented in Python. It uses NumPy, SciPy and optionally Cython for performance. Gensim is specifically designed to handle large text collections, using data streaming and efficient incremental algorithms, which differentiates it from most other scientific software packages that only target batch and in-memory processing.',
 # Import Template
from string import Template

# Create a template
wikipedia = Template("$tool is a $description")

# Substitute variables in template
print(wikipedia.substitute(tool=tool1, description=description1))
print(wikipedia.substitute(tool=tool2, description=description2))
print(wikipedia.substitute(tool=tool3, description=description3))
# Import template
from string import Template

# Select variables
our_tool = tools[0]
our_fee = tools[1]
our_pay = tools[2]

# Create template
course = Template("We are offering a 3-month beginner course on $tool just for $$ $fee ${pay}ly")

# Substitute identifiers with three variables
print(course.substitute(tool=our_tool, fee=our_fee, pay=our_pay))
'answers': {'answer1': 'I really like the app. But there are some features that can be improved'}
# Import template
from string import Template

# Complete template string using identifiers
the_answers = Template("Check your answer 1: $answer1, and your answer 2: $answer2")

# Use safe_substitute to replace identifiers
except KeyError:
    print("Missing information")