NLP – Practically

NLTK

In [1]:

import webbrowser
webbrowser.open('https://www.youtube.com/watch?v=3Dg006VuOMk&list=RD3Dg006VuOMk&start_radio=1')

Out[1]:

True

In [2]:

!pip3 install nltk

Collecting nltk
  Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Requirement already satisfied: click in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (8.1.7)
Requirement already satisfied: joblib in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (1.4.2)
Requirement already satisfied: regex>=2021.8.3 in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (2024.7.24)
Requirement already satisfied: tqdm in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (4.66.5)
Requirement already satisfied: colorama in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from click->nltk) (0.4.6)
Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB)
Installing collected packages: nltk
Successfully installed nltk-3.9.1

In [3]:

import nltk
nltk.download()

showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml

Out[3]:

True

In [9]:

from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [10]:

print(sample_sentence.split())

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [12]:

sample_sentence2="Hi I am Learning NLP with Mr. Karan Arora. My age is 27"
b=sent_tokenize(sample_sentence2)
print(b)

['Hi I am Learning NLP with Mr. Karan Arora.', 'My age is 27']

In [13]:

EXAMPLE_TEXT = "Hello Mr. Karan Arora, how are you doing today? The weather is great, and Python is awesome. The sky is blue. You shouldn't eat chicken."

print(sent_tokenize(EXAMPLE_TEXT))

['Hello Mr. Karan Arora, how are you doing today?', 'The weather is great, and Python is awesome.', 'The sky is blue.', "You shouldn't eat chicken."]

In [14]:

print(word_tokenize(EXAMPLE_TEXT))

['Hello', 'Mr.', 'Karan', 'Arora', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'blue', '.', 'You', 'should', "n't", 'eat', 'chicken', '.']

In [15]:

from nltk.tokenize import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer

In [16]:

tok2=TreebankWordTokenizer() #use short form
tok3=WordPunctTokenizer()
sent="Hi my name is Karan"
print(word_tokenize(sent))
print(tok2.tokenize(sent)) #or TreebankWordTokenizer().tokenize(sent)
print(tok3.tokenize(sent))

['Hi', 'my', 'name', 'is', 'Karan']
['Hi', 'my', 'name', 'is', 'Karan']
['Hi', 'my', 'name', 'is', 'Karan']

In [17]:

sent2="I won't let you bring wine"
print(word_tokenize(sent2))
print(tok2.tokenize(sent2))
print(tok3.tokenize(sent2))

['I', 'wo', "n't", 'let', 'you', 'bring', 'wine']
['I', 'wo', "n't", 'let', 'you', 'bring', 'wine']
['I', 'won', "'", 't', 'let', 'you', 'bring', 'wine']

In [19]:

from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)

['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']

In [20]:

from nltk.corpus import stopwords
stop_words=set(stopwords.words("english"))
stop_words

Out[20]:

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'only',
 'or',
 'other',
 'our',
 'ours',
 'ourselves',
 'out',
 'over',
 'own',
 're',
 's',
 'same',
 'shan',
 "shan't",
 'she',
 "she's",
 'should',
 "should've",
 'shouldn',
 "shouldn't",
 'so',
 'some',
 'such',
 't',
 'than',
 'that',
 "that'll",
 'the',
 'their',
 'theirs',
 'them',
 'themselves',
 'then',
 'there',
 'these',
 'they',
 'this',
 'those',
 'through',
 'to',
 'too',
 'under',
 'until',
 'up',
 've',
 'very',
 'was',
 'wasn',
 "wasn't",
 'we',
 'were',
 'weren',
 "weren't",
 'what',
 'when',
 'where',
 'which',
 'while',
 'who',
 'whom',
 'why',
 'will',
 'with',
 'won',
 "won't",
 'wouldn',
 "wouldn't",
 'y',
 'you',
 "you'd",
 "you'll",
 "you're",
 "you've",
 'your',
 'yours',
 'yourself',
 'yourselves'}

In [22]:

for word in a:
    if word not in stop_words:
        print(word,end=" ")

Hi I Learning NLP Itronix Solutions

In [26]:

from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
port=PorterStemmer()
words = ["program", "programs", "programer", "programing", "programers",'beautiful'] 
for w in words:
    print(port.stem(w))

program
program
program
program
program
beauti

In [27]:

from nltk.stem import LancasterStemmer
lstemmer=LancasterStemmer()
lstemmer.stem('beautiful')

Out[27]:

'beauty'

In [28]:

from nltk.stem import WordNetLemmatizer
lzr = WordNetLemmatizer()
print(lzr.lemmatize('working',pos='v')) #pos : part of speech
print("better :", lzr.lemmatize("better", pos ="a"))

work
better : good

In [ ]:

Machine Learning Tutorials, Courses and Certifications

NLP – Practically

Related Articles

Related

About Machine Learning

Check Also

Stop Words

Leave a Reply Cancel reply

Multiple Linear Regression:

Microsoft AI Classroom Series Assessment Answers

Polynomial Regression

Support Vector Regression

Decision Tree Regression

Python MySQL Order By

ChatGPT Certification

Solr 101 Cognitive class Exam Answers:-

Reactive Architecture: Introduction to Reactive Systems cognitive class Exam Answer:-

Pivot Tables – Data Science Tutorials

OpenCV Python Project for Bus Detection from an Image

OpenCV Python Project for Vehicle Detection From an Image

OpenCV Python Project for Vehicle Detection in a Video frame

Airline Quality Service

Airport Quality Service