In [1]:
import webbrowser
webbrowser.open('https://www.youtube.com/watch?v=3Dg006VuOMk&list=RD3Dg006VuOMk&start_radio=1')
Out[1]:
True
In [2]:
!pip3 install nltk
Collecting nltk Using cached nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB) Requirement already satisfied: click in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (8.1.7) Requirement already satisfied: joblib in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (1.4.2) Requirement already satisfied: regex>=2021.8.3 in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (2024.7.24) Requirement already satisfied: tqdm in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from nltk) (4.66.5) Requirement already satisfied: colorama in c:\users\mehak\appdata\local\programs\python\python312\lib\site-packages (from click->nltk) (0.4.6) Using cached nltk-3.9.1-py3-none-any.whl (1.5 MB) Installing collected packages: nltk Successfully installed nltk-3.9.1
In [3]:
import nltk
nltk.download()
showing info https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/index.xml
Out[3]:
True
In [9]:
from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [10]:
print(sample_sentence.split())
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [12]:
sample_sentence2="Hi I am Learning NLP with Mr. Karan Arora. My age is 27"
b=sent_tokenize(sample_sentence2)
print(b)
['Hi I am Learning NLP with Mr. Karan Arora.', 'My age is 27']
In [13]:
EXAMPLE_TEXT = "Hello Mr. Karan Arora, how are you doing today? The weather is great, and Python is awesome. The sky is blue. You shouldn't eat chicken."
print(sent_tokenize(EXAMPLE_TEXT))
['Hello Mr. Karan Arora, how are you doing today?', 'The weather is great, and Python is awesome.', 'The sky is blue.', "You shouldn't eat chicken."]
In [14]:
print(word_tokenize(EXAMPLE_TEXT))
['Hello', 'Mr.', 'Karan', 'Arora', ',', 'how', 'are', 'you', 'doing', 'today', '?', 'The', 'weather', 'is', 'great', ',', 'and', 'Python', 'is', 'awesome', '.', 'The', 'sky', 'is', 'blue', '.', 'You', 'should', "n't", 'eat', 'chicken', '.']
In [15]:
from nltk.tokenize import word_tokenize
from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import WordPunctTokenizer
In [16]:
tok2=TreebankWordTokenizer() #use short form
tok3=WordPunctTokenizer()
sent="Hi my name is Karan"
print(word_tokenize(sent))
print(tok2.tokenize(sent)) #or TreebankWordTokenizer().tokenize(sent)
print(tok3.tokenize(sent))
['Hi', 'my', 'name', 'is', 'Karan'] ['Hi', 'my', 'name', 'is', 'Karan'] ['Hi', 'my', 'name', 'is', 'Karan']
In [17]:
sent2="I won't let you bring wine"
print(word_tokenize(sent2))
print(tok2.tokenize(sent2))
print(tok3.tokenize(sent2))
['I', 'wo', "n't", 'let', 'you', 'bring', 'wine'] ['I', 'wo', "n't", 'let', 'you', 'bring', 'wine'] ['I', 'won', "'", 't', 'let', 'you', 'bring', 'wine']
In [19]:
from nltk.tokenize import word_tokenize, sent_tokenize
sample_sentence="Hi I am Learning NLP with Itronix Solutions"
a=word_tokenize(sample_sentence)
print(a)
['Hi', 'I', 'am', 'Learning', 'NLP', 'with', 'Itronix', 'Solutions']
In [20]:
from nltk.corpus import stopwords
stop_words=set(stopwords.words("english"))
stop_words
Out[20]:
{'a', 'about', 'above', 'after', 'again', 'against', 'ain', 'all', 'am', 'an', 'and', 'any', 'are', 'aren', "aren't", 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'can', 'couldn', "couldn't", 'd', 'did', 'didn', "didn't", 'do', 'does', 'doesn', "doesn't", 'doing', 'don', "don't", 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadn', "hadn't", 'has', 'hasn', "hasn't", 'have', 'haven', "haven't", 'having', 'he', 'her', 'here', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'i', 'if', 'in', 'into', 'is', 'isn', "isn't", 'it', "it's", 'its', 'itself', 'just', 'll', 'm', 'ma', 'me', 'mightn', "mightn't", 'more', 'most', 'mustn', "mustn't", 'my', 'myself', 'needn', "needn't", 'no', 'nor', 'not', 'now', 'o', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'our', 'ours', 'ourselves', 'out', 'over', 'own', 're', 's', 'same', 'shan', "shan't", 'she', "she's", 'should', "should've", 'shouldn', "shouldn't", 'so', 'some', 'such', 't', 'than', 'that', "that'll", 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'these', 'they', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 've', 'very', 'was', 'wasn', "wasn't", 'we', 'were', 'weren', "weren't", 'what', 'when', 'where', 'which', 'while', 'who', 'whom', 'why', 'will', 'with', 'won', "won't", 'wouldn', "wouldn't", 'y', 'you', "you'd", "you'll", "you're", "you've", 'your', 'yours', 'yourself', 'yourselves'}
In [22]:
for word in a:
if word not in stop_words:
print(word,end=" ")
Hi I Learning NLP Itronix Solutions
In [26]:
from nltk.stem import PorterStemmer
from nltk.tokenize import word_tokenize
port=PorterStemmer()
words = ["program", "programs", "programer", "programing", "programers",'beautiful']
for w in words:
print(port.stem(w))
program program program program program beauti
In [27]:
from nltk.stem import LancasterStemmer
lstemmer=LancasterStemmer()
lstemmer.stem('beautiful')
Out[27]:
'beauty'
In [28]:
from nltk.stem import WordNetLemmatizer
lzr = WordNetLemmatizer()
print(lzr.lemmatize('working',pos='v')) #pos : part of speech
print("better :", lzr.lemmatize("better", pos ="a"))
work better : good
In [ ]: